Beispiel #1
0
def foreach_sentence(layer: Model, drop_factor: float = 1.0) -> Model:
    """Map a layer across sentences (assumes spaCy-esque .sents interface)"""
    def sentence_fwd(docs: List[Doc],
                     drop: Dropout = 0.0) -> Tuple[Acts, Callable]:
        if not all(doc.is_sentenced for doc in docs):
            return layer.begin_update([d[:] for d in docs], drop=drop)
        sents = flatten_list([list(doc.sents) for doc in docs])
        words_per_doc = [len(d._.get(ATTRS.word_pieces)) for d in docs]
        words_per_sent = [len(s._.get(ATTRS.word_pieces)) for s in sents]
        sents_per_doc = [len(list(d.sents)) for d in docs]
        assert sum(words_per_doc) == sum(words_per_sent)
        acts, bp_acts = layer.begin_update(sents, drop=drop)
        # To go from "per sentence" activations to "per doc" activations, we
        # just have to tell it where the sequences end.
        acts.lh.lengths = words_per_doc
        acts.po.lengths = sents_per_doc

        def sentence_bwd(d_acts: Acts,
                         sgd: Optional[Optimizer] = None) -> None:
            assert isinstance(d_acts, Acts)
            # Translate back to the per-sentence activations
            if d_acts.has_lh:
                assert d_acts.lh.data.shape[0] == sum(d_acts.lh.lengths)
                assert d_acts.lh.lengths == words_per_doc
            d_acts.lh.lengths = words_per_sent
            d_acts.po.lengths = [1 for _ in words_per_sent]
            d_ids = bp_acts(d_acts, sgd=sgd)
            if not (d_ids is None or all(ds is None for ds in d_ids)):
                raise ValueError("Expected gradient of sentence to be None")
            return d_ids

        return acts, sentence_bwd

    return wrap(sentence_fwd, layer)
def without_length_batching(model: PyTT_Wrapper, _: Any) -> Model:
    Input = List[Array]
    Output = List[Activations]
    Backprop = Callable[[Output, Optional[Optimizer]], Optional[Input]]
    model_begin_update: Callable[[Array, Dropout], Tuple[Activations,
                                                         Backprop]]
    model_begin_update = model.begin_update

    def apply_model_padded(inputs: Input,
                           drop: Dropout = 0.0) -> Tuple[Output, Backprop]:
        activs, get_dX = model_begin_update(pad_batch(inputs), drop)
        outputs = [
            activs.get_slice(i, slice(0, len(seq)))
            for i, seq in enumerate(inputs)
        ]

        def backprop_batched(d_outputs, sgd=None):
            d_activs = pad_batch_activations(d_outputs)
            dX = get_dX(d_activs, sgd=sgd)
            if dX is not None:
                d_inputs = [
                    dX[i, :len(seq)] for i, seq in enumerate(d_outputs)
                ]
            else:
                d_inputs = None
            return d_inputs

        return outputs, backprop_batched

    return wrap(apply_model_padded, model)
def without_length_batching(model: PyTT_Wrapper, _: Any) -> Model:
    Input = List[Array]
    Output = List[Activations]
    Backprop = Callable[[Output, Optional[Optimizer]], Optional[Input]]
    model_begin_update: Callable[[Array, Dropout], Tuple[Activations,
                                                         Backprop]]
    model_begin_update = model.begin_update

    def apply_model_padded(inputs: Input,
                           drop: Dropout = 0.0) -> Tuple[Output, Backprop]:
        activs, get_dX = model_begin_update(pad_batch(inputs), drop)
        last_hiddens = [
            activs.lh[i, :len(seq)] for i, seq in enumerate(inputs)
        ]
        outputs = [Activations(y, [], [], []) for y in last_hiddens]

        def backprop_batched(d_outputs, sgd=None):
            d_last_hiddens = [x.lh for x in d_outputs]
            dY = pad_batch(d_last_hiddens)
            dY = dY.reshape(len(d_outputs), -1, dY.shape[-1])
            d_activs = Activations(dY, [], [], [], is_grad=True)
            dX = get_dX(d_activs, sgd=sgd)
            if dX is not None:
                d_inputs = [
                    dX[i, :len(seq)] for i, seq in enumerate(d_outputs)
                ]
            else:
                d_inputs = None
            return d_inputs

        return outputs, backprop_batched

    return wrap(apply_model_padded, model)
def apply_layers(*layers):
    """Take a sequence of input layers, and expect input tuples. Apply
    layers[0] to inputs[1], layers[1] to inputs[1], etc.
    """
    def apply_layers_forward(inputs, drop=0.):
        assert len(inputs) == len(layers), (len(inputs), len(layers))
        outputs = []
        callbacks = []
        for layer, input_ in zip(layers, inputs):
            output, callback = layer.begin_update(input_, drop=drop)
            outputs.append(output)
            callbacks.append(callback)

        def apply_layers_backward(d_outputs, sgd=None):
            d_inputs = []
            for callback, d_output in zip(callbacks, d_outputs):
                if callback is None:
                    d_inputs.append(None)
                else:
                    d_inputs.append(callback(d_output, sgd=sgd))
            return d_inputs

        return tuple(outputs), apply_layers_backward

    return wrap(apply_layers_forward, *layers)
def truncate_long_inputs(model, max_len):
    """Truncate inputs on the way into a model, and restore their shape on
    the way out.
    """
    def with_truncate_forward(X, drop=0.0):
        # Dim 1 should be batch, dim 2 sequence length
        if X.shape[1] < max_len:
            return model.begin_update(X, drop=drop)
        X_short = X[:, :max_len]
        Y_short, get_dX_short = model.begin_update(X_short, drop=drop)
        short_lh = Y_short.lh
        Y = model.ops.allocate((short_lh.shape[0], X.shape[1]) +
                               short_lh.shape[2:])
        Y[:, :max_len] = short_lh
        outputs = Activations(Y, [], [], [])

        def with_truncate_backward(dY, sgd=None):
            dY_short = dY.get_slice(slice(0, None), slice(0, max_len))
            dX_short = get_dX_short(dY_short, sgd=sgd)
            if dX_short is None:
                return None
            dX = model.ops.allocate((dX_short.shape[0], dY.shape[1]) +
                                    dY_short.shape[2:])
            dX[:, :max_len] = dX_short
            return dX

        return outputs, with_truncate_backward

    return wrap(with_truncate_forward, model)
def truncate_long_inputs(model: PyTT_Wrapper, max_len: int) -> PyTT_Wrapper:
    """Truncate inputs on the way into a model, and restore their shape on
    the way out.
    """
    def with_truncate_forward(X: Array,
                              drop: Dropout = 0.0
                              ) -> Tuple[Activations, Callable]:
        # Dim 1 should be batch, dim 2 sequence length
        if X.shape[1] < max_len:
            return model.begin_update(X, drop=drop)
        X_short = X[:, :max_len]
        Y_short, get_dX_short = model.begin_update(X_short, drop=drop)
        outputs = pad_batch_activations([Y_short], to=X.shape[1])

        def with_truncate_backward(dY, sgd=None):
            dY_short = dY.get_slice(slice(0, None), slice(0, max_len))
            dX_short = get_dX_short(dY_short, sgd=sgd)
            if dX_short is None:
                return None
            dX = model.ops.allocate((dX_short.shape[0], dY.shape[1]) +
                                    dY_short.shape[2:])
            dX[:, :max_len] = dX_short
            return dX

        return outputs, with_truncate_backward

    return wrap(with_truncate_forward, model)
Beispiel #7
0
def concatenate_lists(*layers, **kwargs):  # pragma: no cover
    """Compose two or more models `f`, `g`, etc, such that their outputs are
    concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
    """
    if not layers:
        return noop()
    drop_factor = kwargs.get("drop_factor", 1.0)
    ops = layers[0].ops
    layers = [chain(layer, flatten) for layer in layers]
    concat = concatenate(*layers)

    def concatenate_lists_fwd(Xs, drop=0.0):
        if drop is not None:
            drop *= drop_factor
        lengths = ops.asarray([len(X) for X in Xs], dtype="i")
        flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
        ys = ops.unflatten(flat_y, lengths)

        def concatenate_lists_bwd(d_ys, sgd=None):
            return bp_flat_y(ops.flatten(d_ys), sgd=sgd)

        return ys, concatenate_lists_bwd

    model = wrap(concatenate_lists_fwd, concat)
    return model
def block_gradients(model):
    from thinc.api import wrap

    def forward(X, drop=0.0):
        Y, _ = model.begin_update(X, drop=drop)
        return Y, None

    return wrap(forward, model)
Beispiel #9
0
def block_gradients(model):
    from thinc.api import wrap

    def forward(X, drop=0.0):
        Y, _ = model.begin_update(X, drop=drop)
        return Y, None

    return wrap(forward, model)
Beispiel #10
0
def concatenate_ragged(*layers):
    model = concatenate(*[chain(layer, getitem(0)) for layer in layers])

    def begin_update(X_lengths, drop=0.):
        X, lengths = X_lengths
        Y, get_dX = model.begin_update(X_lengths, drop=drop)
        return (Y, lengths), get_dX

    output = wrap(begin_update, model)
    return output
def with_length_batching(model: PyTT_Wrapper, max_words: int) -> Model:
    """Wrapper that applies a model to variable-length sequences by first batching
    and padding the sequences. This allows us to group similarly-lengthed sequences
    together, making the padding less wasteful. If min_batch==1, no padding will
    be necessary.
    """
    if max_words < 1:
        return without_length_batching(model, max_words)

    Input = List[Array]
    Output = List[Activations]
    Backprop = Callable[[Output, Optional[Optimizer]], Optional[Input]]

    def apply_model_to_batches(
            inputs: List[Array],
            drop: Dropout = 0.0) -> Tuple[List[Activations], Backprop]:
        batches: List[List[int]] = batch_by_length(inputs, max_words)
        # Initialize this, so we can place the outputs back in order.
        unbatched: List[Optional[Activations]] = [None for _ in inputs]
        backprops = []
        for indices in batches:
            X: Array = pad_batch([inputs[i] for i in indices])
            activs, get_dX = model.begin_update(X, drop=drop)
            backprops.append(get_dX)
            for i, j in enumerate(indices):
                unbatched[j] = activs.get_slice(i, slice(0, len(inputs[j])))
        outputs: List[Activations] = [y for y in unbatched if y is not None]
        assert len(outputs) == len(unbatched)

        def backprop_batched(d_outputs: Output,
                             sgd: Optimizer = None) -> Optional[Input]:
            d_inputs: List[Optional[Array]] = [None for _ in inputs]
            for indices, get_dX in zip(batches, backprops):
                d_activs = pad_batch_activations(
                    [d_outputs[i] for i in indices])
                dX = get_dX(d_activs, sgd=sgd)
                if dX is not None:
                    for i, j in enumerate(indices):
                        # As above, put things back in order, unpad.
                        d_inputs[j] = dX[i, :len(d_outputs[j])]
            not_none = [x for x in d_inputs if x is not None]
            if len(not_none) == 0:
                return None
            else:
                assert len(not_none) == len(d_inputs)
                return not_none

        return outputs, backprop_batched

    return wrap(apply_model_to_batches, model)
Beispiel #12
0
def with_cpu(ops, model):
    """Wrap a model that should run on CPU, transferring inputs and outputs
    as necessary."""
    model.to_cpu()

    def with_cpu_forward(inputs, drop=0.0):
        cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
        gpu_outputs = _to_device(ops, cpu_outputs)

        def with_cpu_backprop(d_outputs, sgd=None):
            cpu_d_outputs = _to_cpu(d_outputs)
            return backprop(cpu_d_outputs, sgd=sgd)

        return gpu_outputs, with_cpu_backprop

    return wrap(with_cpu_forward, model)
Beispiel #13
0
def with_cpu(ops, model):
    """Wrap a model that should run on CPU, transferring inputs and outputs
    as necessary."""
    model.to_cpu()

    def with_cpu_forward(inputs, drop=0.0):
        cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
        gpu_outputs = _to_device(ops, cpu_outputs)

        def with_cpu_backprop(d_outputs, sgd=None):
            cpu_d_outputs = _to_cpu(d_outputs)
            return backprop(cpu_d_outputs, sgd=sgd)

        return gpu_outputs, with_cpu_backprop

    return wrap(with_cpu_forward, model)
Beispiel #14
0
def masked_language_model(vocab, model, mask_prob=0.15):
    """Convert a model into a BERT-style masked language model"""

    random_words = _RandomWords(vocab)

    def mlm_forward(docs, drop=0.0):
        mask, docs = _apply_mask(docs, random_words, mask_prob=mask_prob)
        mask = model.ops.asarray(mask).reshape((mask.shape[0], 1))
        output, backprop = model.begin_update(docs, drop=drop)

        def mlm_backward(d_output, sgd=None):
            d_output *= 1 - mask
            return backprop(d_output, sgd=sgd)

        return output, mlm_backward

    return wrap(mlm_forward, model)
Beispiel #15
0
def masked_language_model(vocab, model, mask_prob=0.15):
    """Convert a model into a BERT-style masked language model"""

    random_words = _RandomWords(vocab)

    def mlm_forward(docs, drop=0.0):
        mask, docs = _apply_mask(docs, random_words, mask_prob=mask_prob)
        mask = model.ops.asarray(mask).reshape((mask.shape[0], 1))
        output, backprop = model.begin_update(docs, drop=drop)

        def mlm_backward(d_output, sgd=None):
            d_output *= 1 - mask
            return backprop(d_output, sgd=sgd)

        return output, mlm_backward

    return wrap(mlm_forward, model)
Beispiel #16
0
def reapply(layer, n_times):
    def reapply_fwd(X, drop=0.):
        backprops = []
        for i in range(n_times):
            Y, backprop = layer.begin_update(X, drop=drop)
            X = Y
            backprops.append(backprop)

        def reapply_bwd(dY, sgd=None):
            dX = None
            for backprop in reversed(backprops):
                dY = backprop(dY, sgd=sgd)
                if dX is None:
                    dX = dY
                else:
                    dX += dY
            return dX

        return Y, reapply_bwd
    return wrap(reapply_fwd, layer)
Beispiel #17
0
def reapply(layer, n_times):
    def reapply_fwd(X, drop=0.):
        backprops = []
        for i in range(n_times):
            Y, backprop = layer.begin_update(X, drop=drop)
            X = Y
            backprops.append(backprop)

        def reapply_bwd(dY, sgd=None):
            dX = None
            for backprop in reversed(backprops):
                dY = backprop(dY, sgd=sgd)
                if dX is None:
                    dX = dY
                else:
                    dX += dY
            return dX

        return Y, reapply_bwd
    return wrap(reapply_fwd, layer)
def foreach_sentence(layer: Model, drop_factor: float = 1.0) -> Model:
    """Map a layer across sentences (assumes spaCy-esque .sents interface)"""
    ops = layer.ops

    Output = List[Activations]
    Backprop = Callable[[Output, Optional[Optimizer]], None]

    def sentence_fwd(docs: List[Doc],
                     drop: Dropout = 0.0) -> Tuple[Output, Backprop]:
        sents: List[Span]
        sent_acts: List[Activations]
        bp_sent_acts: Callable[..., Optional[List[None]]]
        nested: List[List[Activations]]
        doc_sent_lengths: List[List[int]]
        doc_acts: List[Activations]

        sents = flatten_list([list(doc.sents) for doc in docs])
        sent_acts, bp_sent_acts = layer.begin_update(sents, drop=drop)
        nested = unflatten_list(sent_acts,
                                [len(list(doc.sents)) for doc in docs])
        doc_sent_lengths = [[len(sa) for sa in doc_sa] for doc_sa in nested]
        doc_acts = [Activations.join(doc_sa) for doc_sa in nested]
        assert len(docs) == len(doc_acts)

        def sentence_bwd(d_doc_acts: Output,
                         sgd: Optional[Optimizer] = None) -> None:
            d_nested = [
                d_doc_acts[i].split(ops, doc_sent_lengths[i])
                for i in range(len(d_doc_acts))
            ]
            d_sent_acts = flatten_list(d_nested)
            d_ids = bp_sent_acts(d_sent_acts, sgd=sgd)
            if not (d_ids is None or all(ds is None for ds in d_ids)):
                raise ValueError("Expected gradient of sentence to be None")

        return doc_acts, sentence_bwd

    model = wrap(sentence_fwd, layer)
    return model
Beispiel #19
0
def concatenate_lists(*layers, **kwargs):  # pragma: no cover
    """Compose two or more models `f`, `g`, etc, such that their outputs are
    concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
    """
    if not layers:
        return noop()
    drop_factor = kwargs.get("drop_factor", 1.0)
    ops = layers[0].ops
    layers = [chain(layer, flatten) for layer in layers]
    concat = concatenate(*layers)

    def concatenate_lists_fwd(Xs, drop=0.0):
        drop *= drop_factor
        lengths = ops.asarray([len(X) for X in Xs], dtype="i")
        flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
        ys = ops.unflatten(flat_y, lengths)

        def concatenate_lists_bwd(d_ys, sgd=None):
            return bp_flat_y(ops.flatten(d_ys), sgd=sgd)

        return ys, concatenate_lists_bwd

    model = wrap(concatenate_lists_fwd, concat)
    return model
Beispiel #20
0
def with_length_batching(model: TransformersWrapper,
                         max_words: int) -> TransformersWrapper:
    ops = model.ops

    def apply_model_to_batches(inputs: RaggedArray,
                               drop: Dropout = 0.0) -> Tuple[Acts, Callable]:
        if max_words == 0 or inputs.data.shape[0] < max_words:
            return model.begin_update(inputs, drop=drop)
        Xs: List[Array] = ops.unflatten(inputs.data, inputs.lengths)
        outputs = None
        backprops = []
        index2rows = {}
        start = 0
        # Map each index to the slice of rows in the flattened data it refers to.
        for i, length in enumerate(inputs.lengths):
            index2rows[i] = [start + j for j in range(length)]
            start += length
        total_rows = sum(inputs.lengths)
        for indices in batch_by_length(Xs, max_words):
            X: Array = inputs.xp.concatenate([Xs[i] for i in indices])
            lengths = [inputs.lengths[i] for i in indices]
            Y, get_dX = model.begin_update(RaggedArray(X, lengths), drop=drop)
            if outputs is None:
                lh_shape = (total_rows, Y.lh.data.shape[-1])
                po_shape = (len(inputs.lengths), Y.po.data.shape[-1])
                outputs = Acts(
                    RaggedArray(Y.lh.xp.zeros(lh_shape, dtype="f"),
                                inputs.lengths),
                    RaggedArray(Y.po.xp.zeros(po_shape, dtype="f"),
                                [1 for _ in inputs.lengths]),
                )
            lh_rows = []
            po_rows = []
            for index in indices:
                lh_rows.extend(index2rows[index])
                po_rows.append(index)
            lh_rows = outputs.xp.array(lh_rows, dtype="i")
            po_rows = outputs.xp.array(po_rows, dtype="i")
            outputs.lh.data[lh_rows] = Y.lh.data
            if outputs.has_po and po_rows.size:
                outputs.po.data[po_rows] = Y.po.data
            backprops.append((get_dX, lh_rows, po_rows, lengths))

        def backprop_batched(d_outputs: Acts, sgd: Optimizer = None):
            for get_dX, lh_rows, po_rows, lengths in backprops:
                if d_outputs.has_lh:
                    d_lh = d_outputs.lh.data[lh_rows]
                    lh_lengths = lengths
                else:
                    d_lh = d_outputs.lh.data
                    lh_lengths = []
                if d_outputs.has_po:
                    d_po = d_outputs.po.data[po_rows]
                    po_lengths = [1 for _ in lengths]
                else:
                    d_po = d_outputs.po.data
                    po_lengths = []
                dY = Acts(RaggedArray(d_lh, lh_lengths),
                          RaggedArray(d_po, po_lengths))
                dX = get_dX(dY, sgd=sgd)
                assert dX is None
            return None

        return outputs, backprop_batched

    return wrap(apply_model_to_batches, model)