def test_small_end_to_end(width, nb_epoch, min_score, create_model, mnist): batch_size = 128 dropout = 0.2 (train_X, train_Y), (dev_X, dev_Y) = mnist model = create_model(width, dropout, nI=train_X.shape[1], nO=train_Y.shape[1]) model.initialize(X=train_X[:5], Y=train_Y[:5]) optimizer = Adam(0.001) losses = [] scores = [] for i in range(nb_epoch): for X, Y in model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True): Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) losses.append(((Yh - Y)**2).sum()) correct = 0 total = 0 for X, Y in model.ops.multibatch(batch_size, dev_X, dev_Y): Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0] score = correct / total scores.append(score) assert losses[-1] < losses[0], losses if scores[0] < 1.0: assert scores[-1] > scores[0], scores assert any([score > min_score for score in scores]), scores
def test_tensorflow_wrapper_accumulate_gradients(model, X, Y, answer): import tensorflow as tf optimizer = Adam() gradients = [] for i in range(3): guesses, backprop = model(X, is_train=True) d_guesses = (guesses - Y) / guesses.shape[0] backprop(d_guesses) shim_grads = [tf.identity(var) for var in model.shims[0].gradients] gradients.append(shim_grads) # Apply the gradients model.finish_update(optimizer) assert model.shims[0].gradients is None # Compare prev/next pairs and ensure their gradients have changed for i in range(len(gradients)): # Skip the first one if i == 0: continue found_diff = False curr_grads = gradients[i] prev_grads = gradients[i - 1] for curr, prev in zip(curr_grads, prev_grads): if (prev != curr).numpy().any(): found_diff = True assert found_diff is True
def main(n_hidden: int = 256, dropout: float = 0.2, n_iter: int = 10, batch_size: int = 128): # Define the model model: Model = chain( Relu(nO=n_hidden, dropout=dropout), Relu(nO=n_hidden, dropout=dropout), Softmax(), ) # Load the data (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() # Set any missing shapes for the model. model.initialize(X=train_X[:5], Y=train_Y[:5]) train_data = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) dev_data = model.ops.multibatch(batch_size, dev_X, dev_Y) # Create the optimizer. optimizer = Adam(0.001) for i in range(n_iter): for X, Y in tqdm(train_data, leave=False): Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in dev_data: Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0] score = correct / total msg.row((i, f"{score:.3f}"), widths=(3, 5))
def test_model_gpu(): prefer_gpu() n_hidden = 32 dropout = 0.2 (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() model = chain( Relu(nO=n_hidden, dropout=dropout), Relu(nO=n_hidden, dropout=dropout), Softmax(), ) # making sure the data is on the right device train_X = model.ops.asarray(train_X) train_Y = model.ops.asarray(train_Y) dev_X = model.ops.asarray(dev_X) dev_Y = model.ops.asarray(dev_Y) model.initialize(X=train_X[:5], Y=train_Y[:5]) optimizer = Adam(0.001) batch_size = 128 for i in range(2): batches = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) for X, Y in batches: Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in model.ops.multibatch(batch_size, dev_X, dev_Y): Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0]
def parser(vocab): vocab.strings.add("ROOT") config = { "learn_tokens": False, "min_action_freq": 30, "update_with_oracle_cut_size": 100, } cfg = {"model": DEFAULT_PARSER_MODEL} model = registry.resolve(cfg, validate=True)["model"] parser = DependencyParser(vocab, model, **config) parser.cfg["token_vector_width"] = 4 parser.cfg["hidden_width"] = 32 # parser.add_label('right') parser.add_label("left") parser.initialize(lambda: [_parser_example(parser)]) sgd = Adam(0.001) for i in range(10): losses = {} doc = Doc(vocab, words=["a", "b", "c", "d"]) example = Example.from_dict(doc, { "heads": [1, 1, 3, 3], "deps": ["left", "ROOT", "left", "ROOT"] }) parser.update([example], sgd=sgd, losses=losses) return parser
def test_tensorflow_wrapper_train_overfits(model, X, Y, answer): optimizer = Adam() for i in range(100): guesses, backprop = model(X, is_train=True) d_guesses = (guesses - Y) / guesses.shape[0] backprop(d_guesses) model.finish_update(optimizer) predicted = model.predict(X).argmax() assert predicted == answer
def test_mxnet_wrapper_train_overfits(model: Model[Array2d, Array2d], X: Array2d, Y: Array1d, answer: int): optimizer = Adam() for i in range(100): guesses, backprop = model(X, is_train=True) d_guesses = (guesses - Y) / guesses.shape[0] backprop(d_guesses) model.finish_update(optimizer) predicted = model.predict(X).argmax() assert predicted == answer
def test_add_label(parser): parser = _train_parser(parser) parser.add_label("right") sgd = Adam(0.001) for i in range(100): losses = {} parser.update([_parser_example(parser)], sgd=sgd, losses=losses) doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) doc = parser(doc) assert doc[0].dep_ == "right" assert doc[2].dep_ == "left"
def test_tensorflow_wrapper_use_params(model, X, Y, answer): optimizer = Adam() with model.use_params(optimizer.averages): assert model.predict(X).argmax() is not None for i in range(10): guesses, backprop = model.begin_update(X) d_guesses = (guesses - Y) / guesses.shape[0] backprop(d_guesses) model.finish_update(optimizer) with model.use_params(optimizer.averages): predicted = model.predict(X).argmax() assert predicted == answer
def get_updated_model(): fix_random_seed(seed) optimizer = Adam(0.001) model = model_func(**kwargs).initialize() initial_params = get_all_params(model) set_dropout_rate(model, dropout) for _ in range(5): Y, get_dX = model.begin_update(get_X()) dY = get_gradient(model, Y) get_dX(dY) model.finish_update(optimizer) updated_params = get_all_params(model) with pytest.raises(AssertionError): assert_array_equal(initial_params, updated_params) return model
def _train_parser(parser): fix_random_seed(1) parser.add_label("left") parser.initialize(lambda: [_parser_example(parser)]) sgd = Adam(0.001) for i in range(5): losses = {} doc = Doc(parser.vocab, words=["a", "b", "c", "d"]) gold = { "heads": [1, 1, 3, 3], "deps": ["left", "ROOT", "left", "ROOT"] } example = Example.from_dict(doc, gold) parser.update([example], sgd=sgd, losses=losses) return parser
def test_tensorflow_wrapper_accepts_optimizer(model, tf_model, X, Y, answer): # Update the optimizer weights optimizer = Adam() for i in range(10): guesses, backprop = model(X, is_train=True) d_guesses = (guesses - Y) / guesses.shape[0] backprop(d_guesses) model.finish_update(optimizer) # Pass the existing optimizer to a new wrapper shim wrapped = TensorFlowWrapper(tf_model, optimizer=model.shims[0]._optimizer) assert model.shims[0]._optimizer is not None assert wrapped.shims[0]._optimizer is not None weights_model = model.shims[0]._optimizer.get_weights() weights_wrapped = wrapped.shims[0]._optimizer.get_weights() for w1, w2 in zip(weights_model, weights_wrapped): assert numpy.array_equal(w1, w2)
def test_tensorflow_wrapper_serialize_model_subclass( X, Y, input_size, n_classes, answer ): import tensorflow as tf input_shape = (1, input_size) ops = get_current_ops() @keras_subclass( "foo.v1", X=ops.alloc2f(*input_shape), Y=to_categorical(ops.asarray1i([1]), n_classes=n_classes), input_shape=input_shape, ) class CustomKerasModel(tf.keras.Model): def __init__(self, **kwargs): super(CustomKerasModel, self).__init__(**kwargs) self.in_dense = tf.keras.layers.Dense( 12, name="in_dense", input_shape=input_shape ) self.out_dense = tf.keras.layers.Dense( n_classes, name="out_dense", activation="softmax" ) def call(self, inputs) -> tf.Tensor: x = self.in_dense(inputs) return self.out_dense(x) model = TensorFlowWrapper(CustomKerasModel()) # Train the model to predict the right single answer optimizer = Adam() for i in range(50): guesses, backprop = model(X, is_train=True) d_guesses = (guesses - Y) / guesses.shape[0] backprop(d_guesses) model.finish_update(optimizer) predicted = model.predict(X).argmax() assert predicted == answer # Save then Load the model from bytes model.from_bytes(model.to_bytes()) # The from_bytes model gets the same answer assert model.predict(X).argmax() == answer
def test_small_end_to_end(depth, width, vector_width, nb_epoch, create_model, ancora): (train_X, train_Y), (dev_X, dev_Y) = ancora batch_size = 8 model = create_model(depth, width, vector_width).initialize() optimizer = Adam(0.001) losses = [] scores = [] for _ in range(nb_epoch): losses.append(0.0) for X, Y in get_shuffled_batches(train_X, train_Y, batch_size): Yh, backprop = model.begin_update(X) d_loss = [] for i in range(len(Yh)): d_loss.append(Yh[i] - Y[i]) losses[-1] += ((Yh[i] - Y[i])**2).sum() backprop(d_loss) model.finish_update(optimizer) scores.append(evaluate_tagger(model, dev_X, dev_Y, batch_size)) assert losses[-1] < losses[0] assert scores[-1] > scores[0]
def debug_model( config, resolved_train_config, nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] = None, ): if not isinstance(model, Model): msg.fail( f"Requires a Thinc Model to be analysed, but found {type(model)} instead.", exits=1, ) if print_settings is None: print_settings = {} # STEP 0: Printing before training msg.info(f"Analysing model with ID {model.id}") if print_settings.get("print_before_training"): msg.divider(f"STEP 0 - before training") _print_model(model, print_settings) # STEP 1: Initializing the model and printing again X = _get_docs() # The output vector might differ from the official type of the output layer with data_validation(False): try: dot_names = [resolved_train_config["train_corpus"]] with show_validation_error(): (train_corpus, ) = resolve_dot_names(config, dot_names) nlp.initialize(lambda: train_corpus(nlp)) msg.info("Initialized the model with the training corpus.") except ValueError: try: _set_output_dim(nO=7, model=model) with show_validation_error(): nlp.initialize( lambda: [Example.from_dict(x, {}) for x in X]) msg.info("Initialized the model with dummy data.") except Exception: msg.fail( "Could not initialize the model: you'll have to provide a valid train_corpus argument in the config file.", exits=1, ) if print_settings.get("print_after_init"): msg.divider(f"STEP 1 - after initialization") _print_model(model, print_settings) # STEP 2: Updating the model and printing again optimizer = Adam(0.001) set_dropout_rate(model, 0.2) # ugly hack to deal with Tok2Vec listeners tok2vec = None if model.has_ref("tok2vec") and model.get_ref( "tok2vec").name == "tok2vec-listener": tok2vec = nlp.get_pipe("tok2vec") goldY = None for e in range(3): if tok2vec: tok2vec.update([Example.from_dict(x, {}) for x in X]) Y, get_dX = model.begin_update(X) if goldY is None: goldY = _simulate_gold(Y) dY = get_gradient(goldY, Y, model.ops) get_dX(dY) model.finish_update(optimizer) if print_settings.get("print_after_training"): msg.divider(f"STEP 2 - after training") _print_model(model, print_settings) # STEP 3: the final prediction prediction = model.predict(X) if print_settings.get("print_prediction"): msg.divider(f"STEP 3 - prediction") msg.info(str(prediction)) msg.good(f"Succesfully ended analysis - model looks good.")
def debug_model( config, resolved_train_config, nlp, pipe, *, print_settings: Optional[Dict[str, Any]] = None, ): if not hasattr(pipe, "model"): msg.fail( f"The component '{pipe}' does not specify an object that holds a Model.", exits=1, ) model = pipe.model if not isinstance(model, Model): msg.fail( f"Requires a Thinc Model to be analysed, but found {type(model)} instead.", exits=1, ) if print_settings is None: print_settings = {} # STEP 0: Printing before training msg.info(f"Analysing model with ID {model.id}") if print_settings.get("print_before_training"): msg.divider(f"STEP 0 - before training") _print_model(model, print_settings) # STEP 1: Initializing the model and printing again with data_validation(False): try: dot_names = [resolved_train_config["train_corpus"]] with show_validation_error(): (train_corpus, ) = resolve_dot_names(config, dot_names) nlp.initialize(lambda: train_corpus(nlp)) msg.info("Initialized the model with the training corpus.") examples = list(itertools.islice(train_corpus(nlp), 5)) except ValueError: try: _set_output_dim(nO=7, model=model) with show_validation_error(): examples = [Example.from_dict(x, {}) for x in _get_docs()] nlp.initialize(lambda: examples) msg.info("Initialized the model with dummy data.") except Exception: msg.fail( "Could not initialize the model: you'll have to provide a valid 'train_corpus' argument in the config file.", exits=1, ) if print_settings.get("print_after_init"): msg.divider(f"STEP 1 - after initialization") _print_model(model, print_settings) # STEP 2: Updating the model and printing again optimizer = Adam(0.001) set_dropout_rate(model, 0.2) # ugly hack to deal with Tok2Vec/Transformer listeners upstream_component = None if model.has_ref("tok2vec") and "tok2vec-listener" in model.get_ref( "tok2vec").name: upstream_component = nlp.get_pipe("tok2vec") if model.has_ref("tok2vec") and "transformer-listener" in model.get_ref( "tok2vec").name: upstream_component = nlp.get_pipe("transformer") goldY = None for e in range(3): if upstream_component: upstream_component.update(examples) pipe.update(examples) if print_settings.get("print_after_training"): msg.divider(f"STEP 2 - after training") _print_model(model, print_settings) # STEP 3: the final prediction prediction = model.predict([ex.predicted for ex in examples]) if print_settings.get("print_prediction"): msg.divider(f"STEP 3 - prediction") msg.info(str(prediction)) msg.good(f"Succesfully ended analysis - model looks good.")