def cnn_tagger(width: int, vector_width: int, nr_classes: int = 17): with Model.define_operators({">>": chain}): model = strings2arrays() >> with_array( HashEmbed(nO=width, nV=vector_width, column=0) >> expand_window( window_size=1) >> Relu(nO=width, nI=width * 3) >> Relu( nO=width, nI=width) >> Softmax(nO=nr_classes, nI=width)) return model
def build_text_classifier_v2( tok2vec: Model[List[Doc], List[Floats2d]], linear_model: Model[List[Doc], Floats2d], nO: Optional[int] = None, ) -> Model[List[Doc], Floats2d]: exclusive_classes = not linear_model.attrs["multi_label"] with Model.define_operators({">>": chain, "|": concatenate}): width = tok2vec.maybe_get_dim("nO") attention_layer = ParametricAttention( width) # TODO: benchmark performance difference of this layer maxout_layer = Maxout(nO=width, nI=width) norm_layer = LayerNorm(nI=width) cnn_model = ( tok2vec >> list2ragged() >> attention_layer >> reduce_sum() >> residual(maxout_layer >> norm_layer >> Dropout(0.0))) nO_double = nO * 2 if nO else None if exclusive_classes: output_layer = Softmax(nO=nO, nI=nO_double) else: output_layer = Linear(nO=nO, nI=nO_double) >> Logistic() model = (linear_model | cnn_model) >> output_layer model.set_ref("tok2vec", tok2vec) if model.has_dim("nO") is not False: model.set_dim("nO", nO) model.set_ref("output_layer", linear_model.get_ref("output_layer")) model.set_ref("attention_layer", attention_layer) model.set_ref("maxout_layer", maxout_layer) model.set_ref("norm_layer", norm_layer) model.attrs["multi_label"] = not exclusive_classes model.init = init_ensemble_textcat return model
def main(n_hidden: int = 256, dropout: float = 0.2, n_iter: int = 10, batch_size: int = 128): # Define the model model: Model = chain( Relu(nO=n_hidden, dropout=dropout), Relu(nO=n_hidden, dropout=dropout), Softmax(), ) # Load the data (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() # Set any missing shapes for the model. model.initialize(X=train_X[:5], Y=train_Y[:5]) train_data = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) dev_data = model.ops.multibatch(batch_size, dev_X, dev_Y) # Create the optimizer. optimizer = Adam(0.001) for i in range(n_iter): for X, Y in tqdm(train_data, leave=False): Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in dev_data: Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0] score = correct / total msg.row((i, f"{score:.3f}"), widths=(3, 5))
def test_model_gpu(): prefer_gpu() n_hidden = 32 dropout = 0.2 (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() model = chain( Relu(nO=n_hidden, dropout=dropout), Relu(nO=n_hidden, dropout=dropout), Softmax(), ) # making sure the data is on the right device train_X = model.ops.asarray(train_X) train_Y = model.ops.asarray(train_Y) dev_X = model.ops.asarray(dev_X) dev_Y = model.ops.asarray(dev_Y) model.initialize(X=train_X[:5], Y=train_Y[:5]) optimizer = Adam(0.001) batch_size = 128 for i in range(2): batches = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) for X, Y in batches: Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in model.ops.multibatch(batch_size, dev_X, dev_Y): Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0]
def TransformersTagger( starter: str, n_tags: int = 17 ) -> Model[List[List[str]], List[Floats2d]]: return chain( TransformersTokenizer(starter), Transformer(starter), with_array(Softmax(nO=n_tags)), )
def test_validation(): model = chain(Relu(10), Relu(10), with_ragged(reduce_max()), Softmax()) with pytest.raises(DataValidationError): model.initialize(X=model.ops.alloc2f(1, 10), Y=model.ops.alloc2f(1, 10)) with pytest.raises(DataValidationError): model.initialize(X=model.ops.alloc3f(1, 10, 1), Y=model.ops.alloc2f(1, 10)) with pytest.raises(DataValidationError): model.initialize(X=[model.ops.alloc2f(1, 10)], Y=model.ops.alloc2f(1, 10))
def build_tagger_model( tok2vec: Model[List[Doc], List[Floats2d]], nO: Optional[int] = None) -> Model[List[Doc], List[Floats2d]]: """Build a tagger model, using a provided token-to-vector component. The tagger model simply adds a linear layer with softmax activation to predict scores given the token vectors. tok2vec (Model[List[Doc], List[Floats2d]]): The token-to-vector subnetwork. nO (int or None): The number of tags to output. Inferred from the data if None. """ # TODO: glorot_uniform_init seems to work a bit better than zero_init here?! t2v_width = tok2vec.get_dim("nO") if tok2vec.has_dim("nO") else None output_layer = Softmax(nO, t2v_width, init_W=zero_init) softmax = with_array(output_layer) # type: ignore model = chain(tok2vec, softmax) model.set_ref("tok2vec", tok2vec) model.set_ref("softmax", output_layer) model.set_ref("output_layer", output_layer) return model
def build_simple_cnn_text_classifier( tok2vec: Model, exclusive_classes: bool, nO: Optional[int] = None) -> Model[List[Doc], Floats2d]: """ Build a simple CNN text classifier, given a token-to-vector model as inputs. If exclusive_classes=True, a softmax non-linearity is applied, so that the outputs sum to 1. If exclusive_classes=False, a logistic non-linearity is applied instead, so that outputs are in the range [0, 1]. """ fill_defaults = {"b": 0, "W": 0} with Model.define_operators({">>": chain}): cnn = tok2vec >> list2ragged() >> reduce_mean() nI = tok2vec.maybe_get_dim("nO") if exclusive_classes: output_layer = Softmax(nO=nO, nI=nI) fill_defaults["b"] = NEG_VALUE resizable_layer: Model = resizable( output_layer, resize_layer=partial(resize_linear_weighted, fill_defaults=fill_defaults), ) model = cnn >> resizable_layer else: output_layer = Linear(nO=nO, nI=nI) resizable_layer = resizable( output_layer, resize_layer=partial(resize_linear_weighted, fill_defaults=fill_defaults), ) model = cnn >> resizable_layer >> Logistic() model.set_ref("output_layer", output_layer) model.attrs["resize_output"] = partial( resize_and_set_ref, resizable_layer=resizable_layer, ) model.set_ref("tok2vec", tok2vec) model.set_dim( "nO", nO ) # type: ignore # TODO: remove type ignore once Thinc has been updated model.attrs["multi_label"] = not exclusive_classes return model
def build_multi_task_model( tok2vec: Model, maxout_pieces: int, token_vector_width: int, nO: Optional[int] = None, ) -> Model: softmax = Softmax(nO=nO, nI=token_vector_width * 2) model = chain( tok2vec, Maxout( nO=token_vector_width * 2, nI=token_vector_width, nP=maxout_pieces, dropout=0.0, ), LayerNorm(token_vector_width * 2), softmax, ) model.set_ref("tok2vec", tok2vec) model.set_ref("output_layer", softmax) return model
def build_simple_cnn_text_classifier( tok2vec: Model, exclusive_classes: bool, nO: Optional[int] = None ) -> Model[List[Doc], Floats2d]: """ Build a simple CNN text classifier, given a token-to-vector model as inputs. If exclusive_classes=True, a softmax non-linearity is applied, so that the outputs sum to 1. If exclusive_classes=False, a logistic non-linearity is applied instead, so that outputs are in the range [0, 1]. """ with Model.define_operators({">>": chain}): cnn = tok2vec >> list2ragged() >> reduce_mean() if exclusive_classes: output_layer = Softmax(nO=nO, nI=tok2vec.maybe_get_dim("nO")) model = cnn >> output_layer model.set_ref("output_layer", output_layer) else: linear_layer = Linear(nO=nO, nI=tok2vec.maybe_get_dim("nO")) model = cnn >> linear_layer >> Logistic() model.set_ref("output_layer", linear_layer) model.set_ref("tok2vec", tok2vec) model.set_dim("nO", nO) model.attrs["multi_label"] = not exclusive_classes return model
def create_relu_softmax(width, dropout, nI, nO): return chain(clone(ReLu(nO=width, dropout=dropout), 2), Softmax(10, width))
def create_embed_relu_relu_softmax(depth, width, vector_length): with Model.define_operators({">>": chain}): model = strings2arrays() >> with_array( HashEmbed(width, vector_length) >> expand_window(window_size=1) >> ReLu(width, width * 3) >> ReLu(width, width) >> Softmax(17, width)) return model
from thinc.api import chain, ReLu, reduce_max, Softmax, add bad_model = chain(ReLu(10), reduce_max(), Softmax()) bad_model2 = add(ReLu(10), reduce_max(), Softmax())
from thinc.api import chain, Relu, reduce_max, Softmax, add good_model = chain(Relu(10), Relu(10), Softmax()) reveal_type(good_model) good_model2 = add(Relu(10), Relu(10), Softmax()) reveal_type(good_model2) bad_model_undetected = chain(Relu(10), Relu(10), reduce_max(), Softmax()) reveal_type(bad_model_undetected) bad_model_undetected2 = add(Relu(10), Relu(10), reduce_max(), Softmax()) reveal_type(bad_model_undetected2)
from typing import Any, TypeVar from thinc.api import chain, ReLu, reduce_max, Softmax, add, Model good_model = chain(ReLu(10), ReLu(10), Softmax()) reveal_type(good_model) good_model2 = add(ReLu(10), ReLu(10), Softmax()) reveal_type(good_model2) bad_model_undetected = chain(ReLu(10), ReLu(10), ReLu(10), ReLu(10), Softmax()) reveal_type(bad_model_undetected) bad_model_undetected2 = add(ReLu(10), ReLu(10), ReLu(10), ReLu(10), Softmax()) reveal_type(bad_model_undetected2) def forward() -> None: pass OtherType = TypeVar("OtherType") def other_function(layer1: Model, layer2: Model, *layers: Model) -> Model[Any, OtherType]: return Model("some_model", forward) non_combinator_model = other_function(Model("x", forward), Model("y", forward), Model("z", forward))
def TextCatEnsemble_v1( width: int, embed_size: int, pretrained_vectors: Optional[bool], exclusive_classes: bool, ngram_size: int, window_size: int, conv_depth: int, dropout: Optional[float], nO: Optional[int] = None, ) -> Model: # Don't document this yet, I'm not sure it's right. cols = [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID] with Model.define_operators({">>": chain, "|": concatenate, "**": clone}): lower = HashEmbed(nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout, seed=10) prefix = HashEmbed( nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout, seed=11, ) suffix = HashEmbed( nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout, seed=12, ) shape = HashEmbed( nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout, seed=13, ) width_nI = sum( layer.get_dim("nO") for layer in [lower, prefix, suffix, shape]) trained_vectors = FeatureExtractor(cols) >> with_array( uniqued( (lower | prefix | suffix | shape) >> Maxout( nO=width, nI=width_nI, normalize=True), column=cols.index(ORTH), )) if pretrained_vectors: static_vectors = StaticVectors(width) vector_layer = trained_vectors | static_vectors vectors_width = width * 2 else: vector_layer = trained_vectors vectors_width = width tok2vec = vector_layer >> with_array( Maxout(width, vectors_width, normalize=True) >> residual((expand_window(window_size=window_size) >> Maxout( nO=width, nI=width * ((window_size * 2) + 1), normalize=True)))**conv_depth, pad=conv_depth, ) cnn_model = (tok2vec >> list2ragged() >> ParametricAttention(width) >> reduce_sum() >> residual(Maxout(nO=width, nI=width)) >> Linear(nO=nO, nI=width) >> Dropout(0.0)) linear_model = build_bow_text_classifier( nO=nO, ngram_size=ngram_size, exclusive_classes=exclusive_classes, no_output_layer=False, ) nO_double = nO * 2 if nO else None if exclusive_classes: output_layer = Softmax(nO=nO, nI=nO_double) else: output_layer = Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic() model = (linear_model | cnn_model) >> output_layer model.set_ref("tok2vec", tok2vec) if model.has_dim("nO") is not False: model.set_dim("nO", nO) model.set_ref("output_layer", linear_model.get_ref("output_layer")) model.attrs["multi_label"] = not exclusive_classes return model