def build_text_classifier_v2( tok2vec: Model[List[Doc], List[Floats2d]], linear_model: Model[List[Doc], Floats2d], nO: Optional[int] = None, ) -> Model[List[Doc], Floats2d]: exclusive_classes = not linear_model.attrs["multi_label"] with Model.define_operators({">>": chain, "|": concatenate}): width = tok2vec.maybe_get_dim("nO") attention_layer = ParametricAttention( width) # TODO: benchmark performance difference of this layer maxout_layer = Maxout(nO=width, nI=width) norm_layer = LayerNorm(nI=width) cnn_model = ( tok2vec >> list2ragged() >> attention_layer >> reduce_sum() >> residual(maxout_layer >> norm_layer >> Dropout(0.0))) nO_double = nO * 2 if nO else None if exclusive_classes: output_layer = Softmax(nO=nO, nI=nO_double) else: output_layer = Linear(nO=nO, nI=nO_double) >> Logistic() model = (linear_model | cnn_model) >> output_layer model.set_ref("tok2vec", tok2vec) if model.has_dim("nO") is not False: model.set_dim("nO", nO) model.set_ref("output_layer", linear_model.get_ref("output_layer")) model.set_ref("attention_layer", attention_layer) model.set_ref("maxout_layer", maxout_layer) model.set_ref("norm_layer", norm_layer) model.attrs["multi_label"] = not exclusive_classes model.init = init_ensemble_textcat return model
def build_cloze_characters_multi_task_model( vocab: "Vocab", tok2vec: Model, maxout_pieces: int, hidden_size: int, nr_char: int ) -> Model: output_layer = chain( list2array(), Maxout(nO=hidden_size, nP=maxout_pieces), LayerNorm(nI=hidden_size), MultiSoftmax([256] * nr_char, nI=hidden_size), ) model = build_masked_language_model(vocab, chain(tok2vec, output_layer)) model.set_ref("tok2vec", tok2vec) model.set_ref("output_layer", output_layer) return model
def build_multi_task_model( tok2vec: Model, maxout_pieces: int, token_vector_width: int, nO: Optional[int] = None, ) -> Model: softmax = Softmax(nO=nO, nI=token_vector_width * 2) model = chain( tok2vec, Maxout( nO=token_vector_width * 2, nI=token_vector_width, nP=maxout_pieces, dropout=0.0, ), LayerNorm(token_vector_width * 2), softmax, ) model.set_ref("tok2vec", tok2vec) model.set_ref("output_layer", softmax) return model