def test_LSTM_learns(): fix_random_seed(0) nO = 2 nI = 2 def sgd(key, weights, gradient): weights -= 0.001 * gradient return weights, gradient * 0 model = with_padded(LSTM(nO, nI)) X = [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]] Y = [[0.2, 0.2], [0.3, 0.3], [0.4, 0.4]] X = [model.ops.asarray(x, dtype="f").reshape((1, -1)) for x in X] Y = [model.ops.asarray(y, dtype="f").reshape((1, -1)) for y in Y] model = model.initialize(X, Y) Yhs, bp_Yhs = model.begin_update(X) loss1 = sum([((yh - y)**2).sum() for yh, y in zip(Yhs, Y)]) Yhs, bp_Yhs = model.begin_update(X) dYhs = [yh - y for yh, y in zip(Yhs, Y)] dXs = bp_Yhs(dYhs) model.finish_update(sgd) Yhs, bp_Yhs = model.begin_update(X) dYhs = [yh - y for yh, y in zip(Yhs, Y)] dXs = bp_Yhs(dYhs) # noqa: F841 loss2 = sum([((yh - y)**2).sum() for yh, y in zip(Yhs, Y)]) assert loss1 > loss2, (loss1, loss2)
def test_LSTM_fwd_bwd_shapes(nO, nI): nO = 1 nI = 2 X = numpy.asarray([[0.1, 0.1], [-0.1, -0.1], [1.0, 1.0]], dtype="f") model = with_padded(LSTM(nO, nI)).initialize(X=[X]) ys, backprop_ys = model([X], is_train=False) dXs = backprop_ys(ys) assert numpy.vstack(dXs).shape == numpy.vstack([X]).shape
def test_BiLSTM_fwd_bwd_shapes(ops, nO, nI, depth, bi, lengths): Xs = [numpy.ones((length, nI), dtype="f") for length in lengths] model = with_padded(LSTM(nO, nI, depth=depth, bi=bi)).initialize(X=Xs) for node in model.walk(): node.ops = ops ys, backprop_ys = model(Xs, is_train=True) dXs = backprop_ys(ys) assert numpy.vstack(dXs).shape == numpy.vstack(Xs).shape
def noop_models(): return [ with_padded(noop()), with_array(noop()), with_array2d(noop()), with_list(noop()), with_ragged(noop()) ]
def test_layers_with_residual(name, kwargs, in_data, out_data): cfg = {"@layers": "residual.v1", "layer": {"@layers": name, **kwargs}} model = registry.resolve({"config": cfg})["config"] if "LSTM" in name: model = with_padded(model) model.initialize(in_data, out_data) Y, backprop = model(in_data, is_train=True) assert_data_match(Y, out_data) dX = backprop(Y) assert_data_match(dX, in_data)
def test_LSTM_fwd_bwd_shapes_simple(ops, nO, nI): nO = 1 nI = 2 X = numpy.asarray([[0.1, 0.1], [-0.1, -0.1], [1.0, 1.0]], dtype="f") model = with_padded(LSTM(nO, nI)).initialize(X=[X]) for node in model.walk(): node.ops = ops ys, backprop_ys = model([X], is_train=True) dXs = backprop_ys(ys) assert numpy.vstack(dXs).shape == numpy.vstack([X]).shape
def test_layers_from_config(name, kwargs, in_data, out_data): cfg = {"@layers": name, **kwargs} model = registry.resolve({"config": cfg})["config"] if "LSTM" in name: model = with_padded(model) valid = True with data_validation(valid): model.initialize(in_data, out_data) Y, backprop = model(in_data, is_train=True) assert_data_match(Y, out_data) dX = backprop(Y) assert_data_match(dX, in_data)
def test_layers_from_config(name, kwargs, in_data, out_data): cfg = {"@layers": name, **kwargs} filled = registry.fill_config({"config": cfg}) model = registry.make_from_config(filled)["config"] if "LSTM" in name: model = with_padded(model) if "FeatureExtractor" in name: # can't validate fake docs: DATA_VALIDATION.set(False) model.initialize(in_data, out_data) Y, backprop = model(in_data, is_train=True) assert_data_match(Y, out_data) dX = backprop(Y) assert_data_match(dX, in_data) DATA_VALIDATION.set(True)
def test_layers_from_config(name, kwargs, in_data, out_data): cfg = {"@layers": name, **kwargs} model = registry.resolve({"config": cfg})["config"] if "LSTM" in name: model = with_padded(model) valid = True with data_validation(valid): model.initialize(in_data, out_data) Y, backprop = model(in_data, is_train=True) assert_data_match(Y, out_data) dX = backprop(Y) assert_data_match(dX, in_data) # Test that during predictions, no dropout is applied model._to_ops(NoDropoutOps()) model.predict(in_data)
def get_padded_model(): def _trim_padded_forward(model, Xp, is_train): def backprop(dYp): dY = dYp.data dX = model.ops.alloc3f(dY.shape[0], dY.shape[1], dY.shape[2] + 1) return Padded(dX, dYp.size_at_t, dYp.lengths, dYp.indices) assert isinstance(Xp, Padded) X = Xp.data X = X.reshape((X.shape[0] * X.shape[1], X.shape[2])) X = X[:, :-1] X = X.reshape((Xp.data.shape[0], Xp.data.shape[1], X.shape[1])) return Padded(X, Xp.size_at_t, Xp.lengths, Xp.indices), backprop return with_padded(Model("trimpadded", _trim_padded_forward))
def test_layers_batching_all(name, kwargs, in_data, out_data): cfg = {"@layers": name, **kwargs} model = registry.resolve({"config": cfg})["config"] if "expand_window" in name: return if "LSTM" in name: model = with_padded(model) util_batch_unbatch_list(model, in_data, out_data) else: if isinstance(in_data, OPS.xp.ndarray) and in_data.ndim == 2: if isinstance(out_data, OPS.xp.ndarray) and out_data.ndim == 2: util_batch_unbatch_array(model, in_data, out_data) if isinstance(in_data, Ragged): if isinstance(out_data, OPS.xp.ndarray) and out_data.ndim == 2: util_batch_unbatch_ragged(model, in_data, out_data)
def BiLSTMEncoder(width: int, depth: int, dropout: float) -> Model[List[Floats2d], List[Floats2d]]: """Encode context using bidirectonal LSTM layers. Requires PyTorch. width (int): The input and output width. These are required to be the same, to allow residual connections. This value will be determined by the width of the inputs. Recommended values are between 64 and 300. depth (int): The number of recurrent layers. dropout (float): Creates a Dropout layer on the outputs of each LSTM layer except the last layer. Set to 0 to disable this functionality. """ if depth == 0: return noop() return with_padded( PyTorchLSTM(width, width, bi=True, depth=depth, dropout=dropout))
def test_LSTM_init_with_sizes(ops, nO, nI): model = with_padded(LSTM(nO, nI, depth=1)).initialize() for node in model.walk(): model.ops = ops # Check no unallocated params. assert node.has_param("LSTM") is not None assert node.has_param("HC0") is not None for node in model.walk(): # Check param sizes. if node.has_param("LSTM"): params = node.get_param("LSTM") assert params.shape == ( ((nO * 4 * nI)) + (nO * 4) + (nO * 4 * nO + nO * 4), ) if node.has_param("HC0"): params = node.get_param("HC0") assert params.shape == (2, 1, 1, nO)
def test_LSTM_init_with_sizes(nO, nI): model = with_padded(LSTM(nO, nI)).initialize() for node in model.walk(): # Check no unallocated params. assert node.has_param("W") is not None assert node.has_param("b") is not None assert node.has_param("initial_hiddens") is not None assert node.has_param("initial_cells") is not None for node in model.walk(): # Check param sizes. if node.has_param("W"): W = node.get_param("W") assert W.shape == (nO * 4, nO + nI) if node.has_param("b"): b = node.get_param("b") assert b.shape == (nO * 4, ) if node.has_param("initial_hiddens"): initial_hiddens = node.get_param("initial_hiddens") assert initial_hiddens.shape == (nO, ) if node.has_param("initial_cells"): initial_cells = node.get_param("initial_cells") assert initial_cells.shape == (nO, )
def test_benchmark_LSTM_fwd(): nO = 128 nI = 128 n_batch = 1000 batch_size = 30 seq_len = 30 lengths = numpy.random.normal(scale=10, loc=30, size=n_batch * batch_size) lengths = numpy.maximum(lengths, 1) batches = [] uniform_lengths = False model = with_padded(LSTM(nO, nI)).initialize() for batch_lengths in model.ops.minibatch(batch_size, lengths): batch_lengths = list(batch_lengths) if uniform_lengths: seq_len = max(batch_lengths) batch = [ numpy.asarray( numpy.random.uniform(0.0, 1.0, (int(seq_len), nI)), dtype="f" ) for _ in batch_lengths ] else: batch = [ numpy.asarray( numpy.random.uniform(0.0, 1.0, (int(seq_len), nI)), dtype="f" ) for seq_len in batch_lengths ] batches.append(batch) start = timeit.default_timer() for Xs in batches: ys, bp_ys = model.begin_update(list(Xs)) # _ = bp_ys(ys) end = timeit.default_timer() n_samples = n_batch * batch_size print( "--- %i samples in %s seconds (%f samples/s, %.7f s/sample) ---" % (n_samples, end - start, n_samples / (end - start), (end - start) / n_samples) )
def test_pytorch_lstm_init(): model = with_padded(PyTorchLSTM(2, 2, depth=0)).initialize() assert model.name == "with_padded(noop)"
def test_lstm_init(): model = with_padded(LSTM(2, 2, bi=True)).initialize() model.initialize() with pytest.raises(NotImplementedError): with_padded(LSTM(2, dropout=0.2))
def test_lstm_init(): model = with_padded(LSTM(2, 2, bi=True)).initialize() model.initialize()