def create_pretraining_model(nlp, tok2vec, objective="basic"): """Define a network for the pretraining.""" output_size = nlp.vocab.vectors.data.shape[1] # This is annoying, but the parser etc have the flatten step after # the tok2vec. To load the weights in cleanly, we need to match # the shape of the models' components exactly. So what we cann # "tok2vec" has to be the same set of processes as what the components do. with Model.define_operators({">>": chain, "|": concatenate}): l2r_model = ( tok2vec.l2r >> flatten >> LN(Maxout(output_size, tok2vec.l2r.nO, pieces=3)) >> zero_init(Affine(output_size, drop_factor=0.0)) ) r2l_model = ( tok2vec.r2l >> flatten >> LN(Maxout(output_size, tok2vec.r2l.nO, pieces=3)) >> zero_init(Affine(output_size, drop_factor=0.0)) ) model = tok2vec.embed >> (l2r_model | r2l_model) model.tok2vec = tok2vec model.begin_training([nlp.make_doc("Give it a doc to infer shapes")]) tok2vec.begin_training([nlp.make_doc("Give it a doc to infer shapes")]) tokvecs = tok2vec([nlp.make_doc('hello there'), nlp.make_doc(u'and hello')]) print(tokvecs.shape) return model
def _build_network(self, orig_width, hidden_with): with Model.define_operators({">>": chain}): # very simple encoder-decoder model self.encoder = Affine(hidden_with, orig_width) self.model = self.encoder >> zero_init( Affine(orig_width, hidden_with, drop_factor=0.0)) self.sgd = create_default_optimizer(self.model.ops)
def sigmoid_last_hidden(nr_class, *, exclusive_classes=False, **cfg): width = cfg["token_vector_width"] return chain( get_last_hidden, flatten_add_lengths, Pooling(mean_pool), zero_init(Affine(nr_class, width, drop_factor=0.0)), logistic, )