def test(): npa = np.array([-np.pi, 0, np.pi / 2, np.pi]) a = lg.array(npa) lg.tanh(a, out=a) np.tanh(npa, out=npa) assert np.array_equal(a, npa) return
def forward(X, WLSTM, c0=None, h0=None): """ X should be of shape (n,b,input_size), where n = length of sequence, b = batch size """ n, b, input_size = X.shape d = int(WLSTM.shape[1] / 4) # hidden size if c0 is None: c0 = np.zeros((b, d)) if h0 is None: h0 = np.zeros((b, d)) # Perform the LSTM forward pass with X as the input xphpb = WLSTM.shape[0] # x plus h plus bias, lol Hin = np.zeros( (n, b, xphpb)) # input [1, xt, ht-1] to each tick of the LSTM Hout = np.zeros( (n, b, d)) # hidden representation of the LSTM (gated cell content) IFOG = np.zeros((n, b, d * 4)) # input, forget, output, gate (IFOG) IFOGf = np.zeros((n, b, d * 4)) # after nonlinearity C = np.zeros((n, b, d)) # cell content Ct = np.zeros((n, b, d)) # tanh of cell content for t in range(n): # concat [x,h] as input to the LSTM prevh = Hout[t - 1] if t > 0 else h0 Hin[t, :, 0] = 1 # bias Hin[t, :, 1:input_size + 1] = X[t] Hin[t, :, input_size + 1:] = prevh # compute all gate activations. dots: (most work is this line) IFOG[t] = Hin[t].dot(WLSTM) # non-linearities IFOGf[t, :, :3 * d] = 1.0 / (1.0 + np.exp(-IFOG[t, :, :3 * d]) ) # sigmoids; these are the gates IFOGf[t, :, 3 * d:] = np.tanh(IFOG[t, :, 3 * d:]) # tanh # compute the cell activation prevc = C[t - 1] if t > 0 else c0 C[t] = (IFOGf[t, :, :d] * IFOGf[t, :, 3 * d:] + IFOGf[t, :, d:2 * d] * prevc) Ct[t] = np.tanh(C[t]) Hout[t] = IFOGf[t, :, 2 * d:3 * d] * Ct[t] cache = {} cache["WLSTM"] = WLSTM cache["Hout"] = Hout cache["IFOGf"] = IFOGf cache["IFOG"] = IFOG cache["C"] = C cache["Ct"] = Ct cache["Hin"] = Hin cache["c0"] = c0 cache["h0"] = h0 # return C[t], as well so we can continue LSTM with prev state # init if needed return Hout, C[t], Hout[t], cache
def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): start = datetime.datetime.now() X = np.random.randn(sentence_length, batch_size, hidden_size) h0 = np.random.randn(1, hidden_size) WLSTM = np.random.randn( word_size + hidden_size, 4 * hidden_size ) / np.sqrt(word_size + hidden_size) xphpb = WLSTM.shape[0] d = hidden_size n = sentence_length b = batch_size Hin = np.zeros((n, b, xphpb)) Hout = np.zeros((n, b, d)) IFOG = np.zeros((n, b, d * 4)) IFOGf = np.zeros((n, b, d * 4)) C = np.zeros((n, b, d)) Ct = np.zeros((n, b, d)) for t in range(0, n): if t == 0: prev = np.tile(h0, (b, 1)) else: prev = Hout[t - 1] Hin[t, :, :word_size] = X[t] Hin[t, :, word_size:] = prev # compute all gate activations. dots: IFOG[t] = Hin[t].dot(WLSTM) # non-linearities IFOGf[t, :, : 3 * d] = 1.0 / ( 1.0 + np.exp(-IFOG[t, :, : 3 * d]) ) # sigmoids these are the gates IFOGf[t, :, 3 * d :] = np.tanh(IFOG[t, :, 3 * d :]) # tanh # compute the cell activation C[t] = IFOGf[t, :, :d] * IFOGf[t, :, 3 * d :] if t > 0: C[t] += IFOGf[t, :, d : 2 * d] * C[t - 1] Ct[t] = np.tanh(C[t]) Hout[t] = IFOGf[t, :, 2 * d : 3 * d] * Ct[t] # Do a little sum of the outputs to synchronize and check for NaNs total = np.sum(Hout) assert not math.isnan(total) stop = datetime.datetime.now() delta = stop - start total = delta.total_seconds() * 1000.0 if timing: print("Elapsed Time: " + str(total) + " ms") return total
def test(): word_size = 10 hidden_size = 10 sentence_length = 2 batch_size = 3 X = np.random.randn(sentence_length, batch_size, hidden_size) h0 = np.random.randn(1, hidden_size) WLSTM = np.random.randn(word_size + hidden_size, 4 * hidden_size) / np.sqrt(word_size + hidden_size) xphpb = WLSTM.shape[0] d = hidden_size n = sentence_length b = batch_size Hin = np.zeros((n, b, xphpb)) Hout = np.zeros((n, b, d)) IFOG = np.zeros((n, b, d * 4)) IFOGf = np.zeros((n, b, d * 4)) C = np.zeros((n, b, d)) Ct = np.zeros((n, b, d)) for t in range(0, n): if t == 0: prev = np.tile(h0, (b, 1)) else: prev = Hout[t - 1] Hin[t, :, :word_size] = X[t] Hin[t, :, word_size:] = prev # compute all gate activations. dots: IFOG[t] = Hin[t].dot(WLSTM) # non-linearities IFOGf[t, :, :3 * d] = 1.0 / (1.0 + np.exp(-IFOG[t, :, :3 * d]) ) # sigmoids these are the gates IFOGf[t, :, 3 * d:] = np.tanh(IFOG[t, :, 3 * d:]) # tanh # compute the cell activation C[t] = IFOGf[t, :, :d] * IFOGf[t, :, 3 * d:] if t > 0: C[t] += IFOGf[t, :, d:2 * d] * C[t - 1] Ct[t] = np.tanh(C[t]) Hout[t] = IFOGf[t, :, 2 * d:3 * d] * Ct[t] return
def test(): xn = np.array( [[1 + 2j, 3 - 4j, 5 + 6j], [7 - 8j, -9 + 10j, -11 - 12j]], np.complex ) x = lg.array(xn) assert lg.all(lg.abs(lg.sin(x) - np.sin(xn)) < 1e-5) assert lg.all(lg.abs(lg.cos(x) - np.cos(xn)) < 1e-5) assert lg.all(lg.abs(lg.exp(x) - np.exp(xn)) < 1e-5) assert lg.all(lg.abs(lg.tanh(x) - np.tanh(xn)) < 1e-5) assert lg.all(lg.abs(lg.sqrt(x) - np.sqrt(xn)) < 1e-5) return
def test(): xn = np.array([[1, 2, 3], [4, 5, 6]]) x = lg.array(xn) # print(np.sin(xn)) # print(lg.sin(x)) assert np.allclose(np.sin(xn), lg.sin(x)) # print(np.cos(xn)) # print(lg.cos(x)) assert np.allclose(np.cos(xn), lg.cos(x)) # print(np.sqrt(xn)) # print(lg.sqrt(x)) assert np.allclose(np.sqrt(xn), lg.sqrt(x)) # print(np.exp(xn)) # print(lg.exp(x)) assert np.allclose(np.exp(xn), lg.exp(x)) # print(np.log(xn)) # print(lg.log(x)) assert np.allclose(np.log(xn), lg.log(x)) # print(np.absolute(xn)) # print(lg.absolute(x)) assert np.allclose(np.absolute(xn), lg.absolute(x)) y = lg.tanh(x) yn = np.tanh(xn) assert np.allclose(y, yn) y = lg.cos(0.5) # print(y) assert np.allclose(y, np.cos(0.5)) y = lg.sqrt(0.5) # print(y) assert np.allclose(y, np.sqrt(0.5)) y = lg.sin(0.5) # print(y) assert np.allclose(y, np.sin(0.5)) y = lg.exp(2) # print(y) assert np.allclose(y, np.exp(2)) y = lg.log(2) # print(y) assert np.allclose(y, np.log(2)) y = lg.absolute(-3) # print(y) assert y == 3 np.random.seed(42) an = np.random.randn(1, 3, 16) bn = 1.0 / (1.0 + np.exp(-an[0, :, :])) a = lg.array(an) b = 1.0 / (1.0 + lg.exp(-a[0, :, :])) assert np.allclose(b, bn) return
def test(): npa = np.array([-np.pi, 0, np.pi / 2, np.pi]) a = lg.array(npa) assert np.array_equal(lg.tanh(a), np.tanh(npa)) return
def tanh(x): return np.tanh(x)
def test(): test_values = [-np.pi, 0, np.pi / 2, np.pi] for x in test_values: assert np.array_equal(lg.tanh(x), np.tanh(x)) return