def test_replace_node(): relu1 = Relu(5) relu2 = Relu(5) relu_chain = chain(relu1, relu2) relu1_debug = with_debug(relu1) debug = Model( "test", lambda X: (X, lambda dY: dY), layers=[relu1, relu2, relu1, relu_chain], refs={ "relu1": relu1, "relu2": relu2, "relu3": relu1 }, ) debug.replace_node(relu1, relu1_debug) assert debug.layers[0] == relu1_debug assert debug.layers[1] == relu2 assert debug.layers[2] == relu1_debug assert debug.get_ref("relu1") == relu1_debug assert debug.get_ref("relu2") == relu2 assert debug.get_ref("relu3") == relu1_debug # Check that nodes are replaced recursively assert debug.layers[3] == relu_chain assert debug.layers[3].layers[0] == relu1_debug assert debug.layers[3].layers[1] == relu2
def _resume_model( model: Model, resume_path: Path, epoch_resume: int, silent: bool = True ) -> None: msg = Printer(no_print=silent) msg.info(f"Resume training tok2vec from: {resume_path}") with resume_path.open("rb") as file_: weights_data = file_.read() model.get_ref("tok2vec").from_bytes(weights_data) # Parse the epoch number from the given weight file model_name = re.search(r"model\d+\.bin", str(resume_path)) if model_name: # Default weight file name so read epoch_start from it by cutting off 'model' and '.bin' epoch_resume = int(model_name.group(0)[5:][:-4]) + 1 msg.info(f"Resuming from epoch: {epoch_resume}") else: msg.info(f"Resuming from epoch: {epoch_resume}")
def instance_init(model: Model, X: List[Doc] = None, Y: Floats2d = None) -> Model: tok2vec = model.get_ref("tok2vec") if X is not None: tok2vec.initialize(X) return model
def get_tok2vec_width(model: Model): nO = None if model.has_ref("tok2vec"): tok2vec = model.get_ref("tok2vec") if tok2vec.has_dim("nO"): nO = tok2vec.get_dim("nO") elif tok2vec.has_ref("listener"): nO = tok2vec.get_ref("listener").get_dim("nO") return nO
def _resume_model( model: Model, resume_path: Path, epoch_resume: Optional[int], silent: bool = True ) -> int: msg = Printer(no_print=silent) msg.info(f"Resume training tok2vec from: {resume_path}") with resume_path.open("rb") as file_: weights_data = file_.read() model.get_ref("tok2vec").from_bytes(weights_data) if epoch_resume is None: # Parse the epoch number from the given weight file model_name = re.search(r"model\d+\.bin", str(resume_path)) if model_name: # Default weight file name so read epoch_start from it by cutting off 'model' and '.bin' epoch_resume = int(model_name.group(0)[5:][:-4]) + 1 else: # No epoch given and couldn't infer it raise ValueError(Errors.E1020) msg.info(f"Resuming from epoch: {epoch_resume}") return epoch_resume
def debug_model( config, resolved_train_config, nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] = None, ): if not isinstance(model, Model): msg.fail( f"Requires a Thinc Model to be analysed, but found {type(model)} instead.", exits=1, ) if print_settings is None: print_settings = {} # STEP 0: Printing before training msg.info(f"Analysing model with ID {model.id}") if print_settings.get("print_before_training"): msg.divider(f"STEP 0 - before training") _print_model(model, print_settings) # STEP 1: Initializing the model and printing again X = _get_docs() # The output vector might differ from the official type of the output layer with data_validation(False): try: dot_names = [resolved_train_config["train_corpus"]] with show_validation_error(): (train_corpus, ) = resolve_dot_names(config, dot_names) nlp.initialize(lambda: train_corpus(nlp)) msg.info("Initialized the model with the training corpus.") except ValueError: try: _set_output_dim(nO=7, model=model) with show_validation_error(): nlp.initialize( lambda: [Example.from_dict(x, {}) for x in X]) msg.info("Initialized the model with dummy data.") except Exception: msg.fail( "Could not initialize the model: you'll have to provide a valid train_corpus argument in the config file.", exits=1, ) if print_settings.get("print_after_init"): msg.divider(f"STEP 1 - after initialization") _print_model(model, print_settings) # STEP 2: Updating the model and printing again optimizer = Adam(0.001) set_dropout_rate(model, 0.2) # ugly hack to deal with Tok2Vec listeners tok2vec = None if model.has_ref("tok2vec") and model.get_ref( "tok2vec").name == "tok2vec-listener": tok2vec = nlp.get_pipe("tok2vec") goldY = None for e in range(3): if tok2vec: tok2vec.update([Example.from_dict(x, {}) for x in X]) Y, get_dX = model.begin_update(X) if goldY is None: goldY = _simulate_gold(Y) dY = get_gradient(goldY, Y, model.ops) get_dX(dY) model.finish_update(optimizer) if print_settings.get("print_after_training"): msg.divider(f"STEP 2 - after training") _print_model(model, print_settings) # STEP 3: the final prediction prediction = model.predict(X) if print_settings.get("print_prediction"): msg.divider(f"STEP 3 - prediction") msg.info(str(prediction)) msg.good(f"Succesfully ended analysis - model looks good.")
def test_model_init(): class MyShim(Shim): name = "testshim" model_a = create_model("a") model = Model( "test", lambda X: (X, lambda dY: dY), dims={ "nI": 10, "nO": None }, params={ "W": numpy.zeros((10, )), "b": None }, refs={ "a": model_a, "b": None }, attrs={"foo": "bar"}, shims=[MyShim(None)], layers=[model_a, model_a], ) assert model.has_param("W") assert model.get_param("W").shape == (10, ) assert model.has_param("b") is None with pytest.raises(KeyError): model.get_param("b") with pytest.raises(KeyError): model.get_param("X") model.set_param("X", numpy.zeros((10, ))) assert model.has_param("X") assert model.get_param("X").shape == (10, ) with model.use_params({(model.id, "X"): numpy.ones((10, ))}): assert numpy.array_equal(model.get_param("X"), numpy.ones((10, ))) assert numpy.array_equal(model.get_param("X"), numpy.zeros((10, ))) assert not model.has_grad("W") assert not model.has_grad("xyz") with pytest.raises(KeyError): model.get_grad("b") model.set_param("W", model.ops.alloc1f(10)) model.set_grad("W", model.ops.alloc1f(10)) with pytest.raises(ValueError): model.inc_grad("W", numpy.zeros((5, 0))) assert model.has_dim("nI") assert model.get_dim("nI") == 10 with pytest.raises(KeyError): model.get_dim("xyz") with pytest.raises(ValueError): model.get_dim("nO") with pytest.raises(KeyError): model.set_dim("xyz", 20) with pytest.raises(ValueError): model.set_dim("nI", 20) assert model.has_ref("a") assert model.get_ref("a").name == "a" assert not model.has_ref("xyz") with pytest.raises(KeyError): model.get_ref("xyz") assert model.has_ref("b") is None with pytest.raises(ValueError): model.get_ref("b") model.set_ref("c", model_a) assert model.has_ref("c") assert model.get_ref("c").name == "a" with pytest.raises(ValueError): model.set_ref("c", create_model("c")) assert "foo" in model.attrs assert "bar" not in model.attrs assert model.attrs["foo"] == "bar" with pytest.raises(KeyError): model.attrs["bar"] model.attrs["bar"] = "baz" model_copy = model.copy() assert model_copy.name == "test"