Exemple #1
0
            batch_vals.append(vals)
        # The dtype here matches what thinc is expecting -- which differs per
        # platform (by int definition). This should be fixed once the problem
        # is fixed on Thinc's side.
        lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys],
                                   dtype=numpy.int_)
        batch_keys = self.ops.xp.concatenate(batch_keys)
        batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals),
                                      dtype="f")
        return (batch_keys, batch_vals, lengths), None


@describe.on_data(_set_dimensions_if_needed,
                  lambda model, X, y: model.init_weights(model))
@describe.attributes(
    nI=Dimension("Input size"),
    nF=Dimension("Number of features"),
    nO=Dimension("Output size"),
    nP=Dimension("Maxout pieces"),
    W=Synapses("Weights matrix", lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI)),
    b=Biases("Bias vector", lambda obj: (obj.nO, obj.nP)),
    pad=Synapses(
        "Pad",
        lambda obj: (1, obj.nF, obj.nO, obj.nP),
        lambda M, ops: ops.normal_init(M, 1.0),
    ),
    d_W=Gradient("W"),
    d_pad=Gradient("pad"),
    d_b=Gradient("b"),
)
class PrecomputableAffine(Model):
Exemple #2
0
    bigrams = [ops.ngrams(2, doc_unis) for doc_unis in unigrams]
    keys = [ops.xp.concatenate(feats) for feats in zip(unigrams, bigrams)]
    keys, vals = zip(*[ops.xp.unique(k, return_counts=True) for k in keys])
    # The dtype here matches what thinc is expecting -- which differs per
    # platform (by int definition). This should be fixed once the problem
    # is fixed on Thinc's side.
    lengths = ops.asarray([arr.shape[0] for arr in keys], dtype=numpy.int_)
    keys = ops.xp.concatenate(keys)
    vals = ops.asarray(ops.xp.concatenate(vals), dtype='f')
    return (keys, vals, lengths), None


@describe.on_data(_set_dimensions_if_needed,
    lambda model, X, y: model.init_weights(model))
@describe.attributes(
    nI=Dimension("Input size"),
    nF=Dimension("Number of features"),
    nO=Dimension("Output size"),
    nP=Dimension("Maxout pieces"),
    W=Synapses("Weights matrix",
        lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI)),
    b=Biases("Bias vector",
        lambda obj: (obj.nO, obj.nP)),
    pad=Synapses("Pad",
        lambda obj: (1, obj.nF, obj.nO, obj.nP),
        lambda M, ops: ops.normal_init(M, 1.)),
    d_W=Gradient("W"),
    d_pad=Gradient("pad"),
    d_b=Gradient("b"))
class PrecomputableAffine(Model):
    def __init__(self, nO=None, nI=None, nF=None, nP=None, **kwargs):
Exemple #3
0
@layerize
def _preprocess_doc(docs, drop=0.):
    keys = [doc.to_array([LOWER]) for doc in docs]
    ops = Model.ops
    # The dtype here matches what thinc is expecting -- which differs per
    # platform (by int definition). This should be fixed once the problem
    # is fixed on Thinc's side.
    lengths = ops.asarray([arr.shape[0] for arr in keys], dtype=numpy.int_)
    keys = ops.xp.concatenate(keys)
    vals = ops.allocate(keys.shape[0]) + 1
    return (keys, vals, lengths), None


@describe.on_data(_set_dimensions_if_needed,
                  lambda model, X, y: model.init_weights(model))
@describe.attributes(nI=Dimension("Input size"),
                     nF=Dimension("Number of features"),
                     nO=Dimension("Output size"),
                     nP=Dimension("Maxout pieces"),
                     W=Synapses("Weights matrix", lambda obj:
                                (obj.nF, obj.nO, obj.nP, obj.nI)),
                     b=Biases("Bias vector", lambda obj: (obj.nO, obj.nP)),
                     pad=Synapses("Pad", lambda obj:
                                  (1, obj.nF, obj.nO, obj.nP),
                                  lambda M, ops: ops.normal_init(M, 1.)),
                     d_W=Gradient("W"),
                     d_pad=Gradient("pad"),
                     d_b=Gradient("b"))
class PrecomputableAffine(Model):
    def __init__(self, nO=None, nI=None, nF=None, nP=None, **kwargs):
        Model.__init__(self, **kwargs)
        arr = numpy.zeros((len(doc) + 1, ), dtype='uint64')
        for token in doc:
            arr[token.i] = token.orth
        arr[len(doc)] = 0
        seqs.append(arr)
    return seqs, None


from thinc import describe
from thinc.describe import Dimension, Synapses, Gradient
from thinc.neural._lsuv import LSUVinit


@describe.on_data(LSUVinit)
@describe.attributes(
    nM=Dimension("Vector dimensions"),
    nO=Dimension("Size of output"),
    W=Synapses("A projection matrix, to change vector dimensionality",
               lambda obj: (obj.nO, obj.nM),
               lambda W, ops: ops.xavier_uniform_init(W)),
    d_W=Gradient("W"),
)
class SpacyVectors(Model):
    ops = NumpyOps()
    name = 'spacy-vectors'

    def __init__(self, nlp, nO):
        Model.__init__(self)
        self._id_map = {0: 0}
        self.nO = nO
        self.nM = nlp.vocab.vectors_length