예제 #1
0
def create_pretraining_model(nlp, tok2vec, objective="basic"):
    """Define a network for the pretraining."""
    output_size = nlp.vocab.vectors.data.shape[1]
    # This is annoying, but the parser etc have the flatten step after
    # the tok2vec. To load the weights in cleanly, we need to match
    # the shape of the models' components exactly. So what we cann
    # "tok2vec" has to be the same set of processes as what the components do.
    with Model.define_operators({">>": chain, "|": concatenate}):

        l2r_model = (
            tok2vec.l2r
            >> flatten
            >> LN(Maxout(output_size, tok2vec.l2r.nO, pieces=3))
            >> zero_init(Affine(output_size, drop_factor=0.0))
        )
        r2l_model = (
            tok2vec.r2l
            >> flatten
            >> LN(Maxout(output_size, tok2vec.r2l.nO, pieces=3))
            >> zero_init(Affine(output_size, drop_factor=0.0))
        )

        model = tok2vec.embed >> (l2r_model | r2l_model)

    model.tok2vec = tok2vec
    model.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
    tok2vec.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
    tokvecs = tok2vec([nlp.make_doc('hello there'), nlp.make_doc(u'and hello')])
    print(tokvecs.shape)
    return model
예제 #2
0
 def _build_network(self, orig_width, hidden_with):
     with Model.define_operators({">>": chain}):
         # very simple encoder-decoder model
         self.encoder = Affine(hidden_with, orig_width)
         self.model = self.encoder >> zero_init(
             Affine(orig_width, hidden_with, drop_factor=0.0))
     self.sgd = create_default_optimizer(self.model.ops)
예제 #3
0
def sigmoid_last_hidden(nr_class, *, exclusive_classes=False, **cfg):
    width = cfg["token_vector_width"]
    return chain(
        get_last_hidden,
        flatten_add_lengths,
        Pooling(mean_pool),
        zero_init(Affine(nr_class, width, drop_factor=0.0)),
        logistic,
    )