def test_extract_spans_forward_backward(): model = extract_spans().initialize() X = Ragged(model.ops.alloc2f(15, 4), model.ops.asarray([5, 10], dtype="i")) spans = Ragged( model.ops.asarray([[0, 3], [2, 3], [5, 7]], dtype="i"), model.ops.asarray([2, 1], dtype="i"), ) Y, backprop = model.begin_update((X, spans)) assert list(Y.lengths) == [3, 1, 2] assert Y.dataXd.shape == (6, 4) dX, spans2 = backprop(Y) assert spans2 is spans assert dX.dataXd.shape == X.dataXd.shape assert list(dX.lengths) == list(X.lengths)
def test_extract_spans_span_indices(): model = extract_spans().initialize() spans = Ragged( model.ops.asarray([[0, 3], [2, 3], [5, 7]], dtype="i"), model.ops.asarray([2, 1], dtype="i"), ) x_lengths = model.ops.asarray([5, 10], dtype="i") indices = _get_span_indices(model.ops, spans, x_lengths) assert list(indices) == [0, 1, 2, 2, 10, 11]
def test_spancat_model_forward_backward(nO=5): tok2vec = build_Tok2Vec_model(**get_tok2vec_kwargs()) docs = get_docs() spans_list = [] lengths = [] for doc in docs: spans_list.append(doc[:2]) spans_list.append(doc[1:4]) lengths.append(2) spans = Ragged( tok2vec.ops.asarray([[s.start, s.end] for s in spans_list], dtype="i"), tok2vec.ops.asarray(lengths, dtype="i"), ) model = build_spancat_model(tok2vec, reduce_mean(), chain(Relu(nO=nO), Logistic())).initialize(X=(docs, spans)) Y, backprop = model((docs, spans), is_train=True) assert Y.shape == (spans.dataXd.shape[0], nO) backprop(Y)
def span_maker_forward(model, docs: List[Doc], is_train) -> Tuple[Ragged, Callable]: ops = model.ops n_sents = model.attrs["n_sents"] candidates = [] for doc in docs: cands = [] try: sentences = [s for s in doc.sents] except ValueError: # no sentence info, normal in initialization for tok in doc: tok.is_sent_start = tok.i == 0 sentences = [doc[:]] for ent in doc.ents: try: # find the sentence in the list of sentences. sent_index = sentences.index(ent.sent) except AttributeError: # Catch the exception when ent.sent is None and provide a user-friendly warning raise RuntimeError(Errors.E030) from None # get n previous sentences, if there are any start_sentence = max(0, sent_index - n_sents) # get n posterior sentences, or as many < n as there are end_sentence = min(len(sentences) - 1, sent_index + n_sents) # get token positions start_token = sentences[start_sentence].start end_token = sentences[end_sentence].end # save positions for extraction cands.append((start_token, end_token)) candidates.append(ops.asarray2i(cands)) candlens = ops.asarray1i([len(cands) for cands in candidates]) candidates = ops.xp.concatenate(candidates) outputs = Ragged(candidates, candlens) # because this is just rearranging docs, the backprop does nothing return outputs, lambda x: []
def ragged_data(ops, list_data): lengths = numpy.array([len(x) for x in list_data], dtype="i") if not list_data: return Ragged(ops.alloc2f(0, 0), lengths) else: return Ragged(ops.flatten(list_data), lengths)
import pytest import numpy from thinc.api import get_width, Ragged, Padded from thinc.util import get_array_module, is_numpy_array, to_categorical from thinc.util import convert_recursive from thinc.types import ArgsKwargs @pytest.mark.parametrize( "obj,width", [ (numpy.zeros((1, 2, 3, 4)), 4), (numpy.array(1), 0), (numpy.array([1, 2]), 3), ([numpy.zeros((1, 2)), numpy.zeros((1))], 2), (Ragged(numpy.zeros((1, 2)), numpy.zeros(1)), 2), ( Padded( numpy.zeros((2, 1, 2)), numpy.zeros(2), numpy.array([1, 0]), numpy.array([0, 1]), ), 2, ), ([], 0), ], ) def test_get_width(obj, width): assert get_width(obj) == width
import pytest import numpy from thinc.api import get_width, Ragged, Padded from thinc.util import get_array_module, is_numpy_array, to_categorical from thinc.util import convert_recursive from thinc.types import ArgsKwargs @pytest.mark.parametrize( "obj,width", [ (numpy.zeros((1, 2, 3, 4)), 4), (numpy.array(1), 0), (numpy.array([1, 2]), 3), ([numpy.zeros((1, 2)), numpy.zeros((1))], 2), (Ragged(numpy.zeros((1, 2)), numpy.zeros(1)), 2), # type:ignore ( Padded( numpy.zeros((2, 1, 2)), # type:ignore numpy.zeros(2), # type:ignore numpy.array([1, 0]), # type:ignore numpy.array([0, 1]), # type:ignore ), 2, ), ([], 0), ], ) def test_get_width(obj, width): assert get_width(obj) == width
def assert_ragged_data_match(X, Y): return assert_raggeds_match(Ragged(*X), Ragged(*Y))
def zero_suggester(docs, *, ops=None): if ops is None: ops = get_current_ops() return Ragged(ops.xp.zeros((0, 0), dtype="i"), ops.xp.zeros((len(docs), ), dtype="i"))