def test_lstm_forward_training_fuzz(ops, args): params, H0, C0, X, size_at_t = args reference_ops = Ops() reference = reference_ops.lstm_forward_training(params, H0, C0, X, size_at_t) Y, fwd_state = ops.lstm_forward_training(params, H0, C0, X, size_at_t) assert_allclose(fwd_state[2], reference[1][2], atol=1e-4, rtol=1e-3) assert_allclose(fwd_state[1], reference[1][1], atol=1e-4, rtol=1e-3) assert_allclose(Y, reference[0], atol=1e-4, rtol=1e-3)
def test_lstm_forward_training(ops, depth, dirs, nO, batch_size, nI): reference_ops = Ops() params, H0, C0, X, size_at_t = get_lstm_args(depth, dirs, nO, batch_size, nI) reference = reference_ops.lstm_forward_training(params, H0, C0, X, size_at_t) Y, fwd_state = ops.lstm_forward_training(params, H0, C0, X, size_at_t) assert_allclose(fwd_state[2], reference[1][2], atol=1e-4, rtol=1e-3) assert_allclose(fwd_state[1], reference[1][1], atol=1e-4, rtol=1e-3) assert_allclose(Y, reference[0], atol=1e-4, rtol=1e-3)
def test_get_ops(): assert isinstance(get_ops("numpy"), NumpyOps) assert isinstance(get_ops("cupy"), CupyOps) with pytest.raises(ValueError): get_ops("blah") ops = Ops(numpy) assert ops.xp == numpy
def test_backprop_seq2col_window_one(ops, X): if X.shape[1] % 3: return None X = ops.asarray(X) if ops.xp.abs(X).max() >= 30: return None base_ops = Ops() base_ops.xp = ops.xp target = base_ops.backprop_seq2col(X, nW=1) predicted = ops.backprop_seq2col(X, nW=1) for row in range(target.shape[0]): diff = target[row].sum() - predicted[row].sum() if diff < -0.1 or diff > 0.1: print(row, diff) print(target[row]) print(predicted[row]) ops.xp.testing.assert_allclose(target, predicted, atol=0.001, rtol=0.001)
def test_seq2col_window_one(ops, X): X = ops.asarray(X) base_ops = Ops() base_ops.xp = ops.xp baseX = base_ops.alloc(X.shape) + X target = base_ops.seq2col(base_ops.asarray(baseX), nW=1) predicted = ops.seq2col(X, nW=1) ops.xp.testing.assert_allclose(target, predicted, atol=0.001, rtol=0.001)
def apply_alignment(ops: Ops, align: Ragged, X: Floats2d) -> Tuple[Ragged, Callable]: """Align wordpiece data (X) to match tokens, and provide a callback to reverse it. This function returns a Ragged array, which represents the fact that one token may be aligned against multiple wordpieces. It's a nested list, concatenated with a lengths array to indicate the nested structure. The alignment is also a Ragged array, where the lengths indicate how many wordpieces each token is aligned against. The output ragged therefore has the same lengths as the alignment ragged, which means the output data also has the same number of data rows as the alignment. The size of the lengths array indicates the number of tokens in the batch. The actual alignment is a simple indexing operation: for i, index in enumerate(align.data): Y[i] = X[index] Which is vectorized via numpy advanced indexing: Y = X[align.data] The inverse operation, for the backward pass, uses the 'scatter_add' op because one wordpiece may be aligned against multiple tokens. So we need: for i, index in enumerate(align.data): X[index] += Y[i] The addition wouldn't occur if we simply did `X[index] = Y`, so we use the scatter_add op. """ if not align.lengths.sum(): return _apply_empty_alignment(ops, align, X) shape = X.shape indices = cast(Ints1d, align.dataXd) Y = Ragged(X[indices], cast(Ints1d, ops.asarray(align.lengths))) def backprop_apply_alignment(dY: Ragged) -> Floats2d: assert dY.data.shape[0] == indices.shape[0] dX = ops.alloc2f(*shape) ops.scatter_add(dX, indices, cast(Floats2d, dY.dataXd)) return dX return Y, backprop_apply_alignment
def test_get_ops(): assert isinstance(get_ops("numpy"), NumpyOps) assert isinstance(get_ops("cupy"), CupyOps) # If Apple ops are available, "cpu" should return AppleOps or # NumpyOps otherwise. try: from thinc_apple_ops import AppleOps assert isinstance(get_ops("cpu"), AppleOps) except ImportError: assert isinstance(get_ops("cpu"), NumpyOps) # If BigEndian ops are available, "cpu" should return BigEndianOps or # NumpyOps otherwise. try: from thinc_bigendian_ops import BigEndianOps assert isinstance(get_ops("cpu"), BigEndianOps) except ImportError: assert isinstance(get_ops("cpu"), NumpyOps) with pytest.raises(ValueError): get_ops("blah") ops = Ops(numpy) assert ops.xp == numpy
assert arr.shape == (8, 3, 4) assert size_at_t[0] == 3 assert size_at_t[1] == 3 assert size_at_t[2] == 2 assert size_at_t[3] == 2 assert size_at_t[4] == 2 assert size_at_t[5] == 1 assert size_at_t[6] == 1 assert size_at_t[7] == 1 unpadded = ops.padded2list(padded) assert unpadded[0].shape == (5, 4) assert unpadded[1].shape == (8, 4) assert unpadded[2].shape == (2, 4) @pytest.mark.parametrize("ops", [Ops(), NumpyOps()]) @pytest.mark.parametrize("nO,nI", [(1, 2), (2, 2), (100, 200), (9, 6)]) def test_LSTM_init_with_sizes(ops, nO, nI): model = with_padded(LSTM(nO, nI, depth=1)).initialize() for node in model.walk(): model.ops = ops # Check no unallocated params. assert node.has_param("LSTM") is not None assert node.has_param("HC0") is not None for node in model.walk(): # Check param sizes. if node.has_param("LSTM"): params = node.get_param("LSTM") assert params.shape == ( ((nO * 4 * nI)) + (nO * 4) + (nO * 4 * nO + nO * 4), )
def _get_drop_mask(ops: Ops, nO: int, rate: Optional[float]) -> Optional[Floats1d]: if rate is not None: mask = ops.get_dropout_mask((nO,), rate) return mask # type: ignore return None
def _handle_empty(ops: Ops, nO: int): return Ragged(ops.alloc2f(0, nO), ops.alloc1i(0)), lambda d_ragged: []
from hypothesis import given, settings from hypothesis.strategies import composite, integers from numpy.testing import assert_allclose from thinc.api import NumpyOps, CupyOps, Ops, get_ops from thinc.api import get_current_ops, use_ops from thinc.api import fix_random_seed from thinc.api import LSTM import inspect from .. import strategies from ..strategies import ndarrays_of_shape MAX_EXAMPLES = 10 VANILLA_OPS = Ops(numpy) NUMPY_OPS = NumpyOps() BLIS_OPS = NumpyOps(use_blis=True) CPU_OPS = [NUMPY_OPS, VANILLA_OPS] XP_OPS = [NUMPY_OPS] if CupyOps.xp is not None: XP_OPS.append(CupyOps()) ALL_OPS = XP_OPS + [VANILLA_OPS] @pytest.mark.parametrize("op", [NumpyOps, CupyOps]) def test_ops_consistency(op): """Test that specific ops don't define any methods that are not on the Ops base class and that all ops methods define the exact same arguments.""" attrs = [m for m in dir(op) if not m.startswith("_")] for attr in attrs:
import numpy from hypothesis import given, settings from hypothesis.strategies import composite, integers from numpy.testing import assert_allclose from thinc.api import NumpyOps, CupyOps, Ops, get_ops from thinc.api import get_current_ops, use_ops from thinc.api import fix_random_seed from thinc.api import LSTM import inspect from .. import strategies from ..strategies import ndarrays_of_shape MAX_EXAMPLES = 10 VANILLA_OPS = Ops(numpy) # type:ignore NUMPY_OPS = NumpyOps() BLIS_OPS = NumpyOps(use_blis=True) CPU_OPS = [NUMPY_OPS, VANILLA_OPS] XP_OPS = [NUMPY_OPS] if CupyOps.xp is not None: XP_OPS.append(CupyOps()) ALL_OPS = XP_OPS + [VANILLA_OPS] @pytest.mark.parametrize("op", [NumpyOps, CupyOps]) def test_ops_consistency(op): """Test that specific ops don't define any methods that are not on the Ops base class and that all ops methods define the exact same arguments.""" attrs = [m for m in dir(op) if not m.startswith("_")] for attr in attrs:
def _get_drop_mask(ops: Ops, nO: int, rate: Optional[float]) -> Optional[Floats1d]: return ops.get_dropout_mask((nO, ), rate) if rate is not None else None