def get_lstm_args(depth, dirs, nO, batch_size, nI, draw=None): if dirs == 1: n_params = (nO * 4) * nI + nO * 4 + nO * 4 * nO + nO * 4 for _ in range(1, depth): n_params += nO * 4 * nO + nO * 4 + nO * 4 * nO + nO * 4 else: n_params = (nO * 2) * nI + nO * 2 + nO * 2 * (nO // 2) + nO * 2 for _ in range(1, depth): n_params += nO * 2 * nO + nO * 2 + nO * 2 * (nO // 2) + nO * 2 n_params *= 2 lstm = LSTM(nO, nI, depth=depth, bi=dirs >= 2).initialize() assert lstm.get_param("LSTM").size == n_params if draw: params = draw(ndarrays_of_shape(n_params)) # For some reason this is crashing hypothesis? #size_at_t = draw(ndarrays_of_shape(shape=(batch_size,), lo=1, dtype="int32")) size_at_t = numpy.ones(shape=(batch_size,), dtype="int32") X = draw(ndarrays_of_shape((int(size_at_t.sum()), nI))) else: params = numpy.ones((n_params,), dtype="f") size_at_t = numpy.ones(shape=(batch_size,), dtype="int32") X = numpy.zeros(((int(size_at_t.sum()), nI))) H0 = numpy.zeros((depth, dirs, nO // dirs)) C0 = numpy.zeros((depth, dirs, nO // dirs)) return (params, H0, C0, X, size_at_t)
def test_LSTM_learns(): fix_random_seed(0) nO = 2 nI = 2 def sgd(key, weights, gradient): weights -= 0.001 * gradient return weights, gradient * 0 model = with_padded(LSTM(nO, nI)) X = [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]] Y = [[0.2, 0.2], [0.3, 0.3], [0.4, 0.4]] X = [model.ops.asarray(x, dtype="f").reshape((1, -1)) for x in X] Y = [model.ops.asarray(y, dtype="f").reshape((1, -1)) for y in Y] model = model.initialize(X, Y) Yhs, bp_Yhs = model.begin_update(X) loss1 = sum([((yh - y)**2).sum() for yh, y in zip(Yhs, Y)]) Yhs, bp_Yhs = model.begin_update(X) dYhs = [yh - y for yh, y in zip(Yhs, Y)] dXs = bp_Yhs(dYhs) model.finish_update(sgd) Yhs, bp_Yhs = model.begin_update(X) dYhs = [yh - y for yh, y in zip(Yhs, Y)] dXs = bp_Yhs(dYhs) # noqa: F841 loss2 = sum([((yh - y)**2).sum() for yh, y in zip(Yhs, Y)]) assert loss1 > loss2, (loss1, loss2)
def test_LSTM_fwd_bwd_shapes(nO, nI): nO = 1 nI = 2 X = numpy.asarray([[0.1, 0.1], [-0.1, -0.1], [1.0, 1.0]], dtype="f") model = with_padded(LSTM(nO, nI)).initialize(X=[X]) ys, backprop_ys = model([X], is_train=False) dXs = backprop_ys(ys) assert numpy.vstack(dXs).shape == numpy.vstack([X]).shape
def test_BiLSTM_fwd_bwd_shapes(ops, nO, nI, depth, bi, lengths): Xs = [numpy.ones((length, nI), dtype="f") for length in lengths] model = with_padded(LSTM(nO, nI, depth=depth, bi=bi)).initialize(X=Xs) for node in model.walk(): node.ops = ops ys, backprop_ys = model(Xs, is_train=True) dXs = backprop_ys(ys) assert numpy.vstack(dXs).shape == numpy.vstack(Xs).shape
def test_LSTM_fwd_bwd_shapes_simple(ops, nO, nI): nO = 1 nI = 2 X = numpy.asarray([[0.1, 0.1], [-0.1, -0.1], [1.0, 1.0]], dtype="f") model = with_padded(LSTM(nO, nI)).initialize(X=[X]) for node in model.walk(): node.ops = ops ys, backprop_ys = model([X], is_train=True) dXs = backprop_ys(ys) assert numpy.vstack(dXs).shape == numpy.vstack([X]).shape
def test_LSTM_init_with_sizes(ops, nO, nI): model = with_padded(LSTM(nO, nI, depth=1)).initialize() for node in model.walk(): model.ops = ops # Check no unallocated params. assert node.has_param("LSTM") is not None assert node.has_param("HC0") is not None for node in model.walk(): # Check param sizes. if node.has_param("LSTM"): params = node.get_param("LSTM") assert params.shape == ( ((nO * 4 * nI)) + (nO * 4) + (nO * 4 * nO + nO * 4), ) if node.has_param("HC0"): params = node.get_param("HC0") assert params.shape == (2, 1, 1, nO)
def test_LSTM_init_with_sizes(nO, nI): model = with_padded(LSTM(nO, nI)).initialize() for node in model.walk(): # Check no unallocated params. assert node.has_param("W") is not None assert node.has_param("b") is not None assert node.has_param("initial_hiddens") is not None assert node.has_param("initial_cells") is not None for node in model.walk(): # Check param sizes. if node.has_param("W"): W = node.get_param("W") assert W.shape == (nO * 4, nO + nI) if node.has_param("b"): b = node.get_param("b") assert b.shape == (nO * 4, ) if node.has_param("initial_hiddens"): initial_hiddens = node.get_param("initial_hiddens") assert initial_hiddens.shape == (nO, ) if node.has_param("initial_cells"): initial_cells = node.get_param("initial_cells") assert initial_cells.shape == (nO, )
def test_benchmark_LSTM_fwd(): nO = 128 nI = 128 n_batch = 1000 batch_size = 30 seq_len = 30 lengths = numpy.random.normal(scale=10, loc=30, size=n_batch * batch_size) lengths = numpy.maximum(lengths, 1) batches = [] uniform_lengths = False model = with_padded(LSTM(nO, nI)).initialize() for batch_lengths in model.ops.minibatch(batch_size, lengths): batch_lengths = list(batch_lengths) if uniform_lengths: seq_len = max(batch_lengths) batch = [ numpy.asarray( numpy.random.uniform(0.0, 1.0, (int(seq_len), nI)), dtype="f" ) for _ in batch_lengths ] else: batch = [ numpy.asarray( numpy.random.uniform(0.0, 1.0, (int(seq_len), nI)), dtype="f" ) for seq_len in batch_lengths ] batches.append(batch) start = timeit.default_timer() for Xs in batches: ys, bp_ys = model.begin_update(list(Xs)) # _ = bp_ys(ys) end = timeit.default_timer() n_samples = n_batch * batch_size print( "--- %i samples in %s seconds (%f samples/s, %.7f s/sample) ---" % (n_samples, end - start, n_samples / (end - start), (end - start) / n_samples) )
def test_lstm_init(): model = with_padded(LSTM(2, 2, bi=True)).initialize() model.initialize() with pytest.raises(NotImplementedError): with_padded(LSTM(2, dropout=0.2))
def test_lstm_init(): model = with_padded(LSTM(2, 2, bi=True)).initialize() model.initialize()