def _one_directional_loop(di): # di=0, forward GRU # di=1, backward GRU xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx]) gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx]) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) h_bar = (1 - z) * h_bar + z * h if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list
def _one_directional_loop(di): # di=0, forward RNN # di=1, backward RNN xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) rnn_in = ( linear.linear(x, xws[layer_idx], xbs[layer_idx]) + linear.linear(h, hws[layer_idx], hbs[layer_idx])) if activation == 'tanh': h_bar = tanh.tanh(rnn_in) elif activation == 'relu': h_bar = relu.relu(rnn_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list
def _one_directional_loop(di): # di=0, forward RNN # di=1, backward RNN xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) rnn_in = (linear.linear(x, xws[layer_idx], xbs[layer_idx]) + linear.linear(h, hws[layer_idx], hbs[layer_idx])) if activation == 'tanh': h_bar = tanh.tanh(rnn_in) elif activation == 'relu': h_bar = relu.relu(rnn_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list
def _lstm(x, h, c, w, b): xw = _stack_weight([w[2], w[0], w[1], w[3]]) hw = _stack_weight([w[6], w[4], w[5], w[7]]) xb = _stack_weight([b[2], b[0], b[1], b[3]]) hb = _stack_weight([b[6], b[4], b[5], b[7]]) lstm_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb) c_bar, h_bar = lstm.lstm(c, lstm_in) return h_bar, c_bar
def f(x, h, c, w, b): xw, hw = w xb, hb = b rnn_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb) if activation == 'tanh': return tanh.tanh(rnn_in), None elif activation == 'relu': return relu.relu(rnn_in), None
def __call__(self, x): if self.W.data is None: in_size = functools.reduce(operator.mul, x.shape[1:], 1) self._initialize_params(in_size) self.W.data[self.triu_indices] = 0 return linear.linear(x, self.W, self.b)
def _gru(x, h, c, w, b): xw = concat.concat([w[0], w[1], w[2]], axis=0) hw = concat.concat([w[3], w[4], w[5]], axis=0) xb = concat.concat([b[0], b[1], b[2]], axis=0) hb = concat.concat([b[3], b[4], b[5]], axis=0) gru_x = linear.linear(x, xw, xb) gru_h = linear.linear(h, hw, hb) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) return (1 - z) * h_bar + z * h, None
def lz3_Linear(_in, _out): print(_in.link.__dict__['_params']) _out.data[0] = np.zeros(_out.data[0].shape) my_linear_out = linear(_in.args[0], _in.link.__dict__['W'], _in.link.__dict__['b']) my_linear_out = my_linear_out.reshape(_out.data[0].shape) _out.data[0] = my_linear_out.data
def ly2_Linear(_in, _out): if var.n < 0: # No fault injection for Normal System case assert _in.args[ 0].dtype == np.float32, 'Unsupport input type {}'.format( _out.dtype) assert _out.dtype == np.float32, 'Unsupport out type {}'.format( _out.dtype) return batch, in_size = _in.args[0].shape batch, go_size = _out.data.shape detect_flag, layer, node, bit, sa01 = var.faultpat[var.n] if layer == 1: # Update _in with sa01 g = _in.args[0].data.copy() for i in range(batch): normal = g[i][node % in_size] v_float, v_uint = bitChange(normal, bit, sa01) g[i][node % in_size] = np.float32(v_float) if 0: print("{:8d} faultpattern={}".format(var.n, var.faultpat[var.n])) print(' ' * 8, np.max(_in.args[0]), '=>', np.max(g), np.min(_in.args[0]), '=>', np.min(g)) _in.args[0].data = g # Calculate Linear Layer after fault insertion this_linear_out = linear(_in.args[0], _in.link.__dict__['W'], _in.link.__dict__['b']) this_linear_out = this_linear_out.reshape(_out.data.shape) _out.data = this_linear_out.data
def test_t_is_10_nonzero_c_sequence_output(): np.random.seed(2) N = 1 T = 10 C1 = 128 C2 = 64 vx = np.random.normal(size=(N, T, C1)).astype(np.float32) vw_input = np.random.normal(size=(C1, C2 * 4)).astype(np.float32) vw_hidden = np.random.normal(size=(C2, C2 * 4)).astype(np.float32) vb = np.random.normal(size=(C2 * 4,)).astype(np.float32) vc_in = np.random.normal(size=(N, C2)).astype(np.float32) vc_out = vc_in.copy() vh_in = np.random.normal(size=(N, C2)).astype(np.float32) vh = vh_in vw_input_c = _convert_to_chainer_order(vw_input) vw_hidden_c = _convert_to_chainer_order(vw_hidden) vb_c = _convert_to_chainer_order(vb[None, :]) vh_sequence = [] for i in range(T): vc_out, vh = lstm(vc_out, linear(vx[:, i, :], vw_input_c.T) + linear(vh, vw_hidden_c.T) + vb_c) vh_sequence.append(vh.data) vh = np.array(vh_sequence).transpose((1, 0, 2)) # TNC -> NTC vc_out = vc_out.data x = Variable(vx.shape, order=OrderNTC) c_in = ConstantVariable(vc_in, order=OrderNC) vh_in = ConstantVariable(vh_in, order=OrderNC) w_input = ConstantVariable(vw_input, order=OrderCN) w_hidden = ConstantVariable(vw_hidden, order=OrderCN) b = ConstantVariable(vb, order=OrderC) y, c_out = LSTM(None, return_sequences=True, use_bias=True, use_initial_c=True, use_initial_h=True, activation="tanh", recurrent_activation="sigmoid")(x, w_input, w_hidden, b, initial_c=c_in, initial_h=vh_in) generate_kernel_test_case( description=f"LSTM t=10 initial_c,initial_h=nonzero sequence_out", backend=["webassembly", "webgpu"], graph=Graph([x], [y, c_out]), inputs={x: vx}, expected={y: vh, c_out: vc_out}, EPS=1e-3, ABS_EPS=1e-7 )
def __call__(self, x): """Applies the linear layer. However, I checked this code for simple data, It does not work... Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.U.data is None or self.V.data is not None: in_size = x.shape[1] self._initialize_params(in_size) # x: (batch_size, CxHxW) # V: (CxHxW, k) # W: (k, CxHxW) # (V*(U*x))+b = Wx + b W1 = linear.linear(x, self.U) return linear.linear(W1, self.V, self.b)
def __call__(self, x, W=None, b=None): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.has_uninitialized_params: with cuda.get_device(self._device_id): self._initialize_params(x.size // x.shape[0]) if W is not None: return linear.linear(x, W, b) return linear.linear(x, self.W, self.b)
def __call__(self, x, W=None, b=None): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.has_uninitialized_params: with cuda.get_device_from_id(self._device_id): self._initialize_params(x.size // x.shape[0]) if W is not None: return linear.linear(x, W, b) return linear.linear(x, self.W, self.b)
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.W.data is None: self._initialize_params(x.size // x.shape[0]) return linear.linear(x, self.W_bar, self.b)
def _one_directional_loop(di): # di=0, forward LSTM # di=1, backward LSTM h_list = [] c_list = [] layer_idx = direction * layer + di h = hx[layer_idx] c = cx[layer_idx] if di == 0: xs_list = xs_next else: xs_list = reversed(xs_next) counter = 0 for x in xs_list: counter += 1 batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) c, c_rest = split_axis.split_axis(c, [batch], axis=0) else: h_rest = None c_rest = None if layer != 0: x = dropout.dropout(x, ratio=dropout_ratio) if counter == 4: lstm_in = linear.linear(x, xws[layer_idx], xbs[layer_idx]) else: lstm_in = linear.linear( x, xws[layer_idx], xbs[layer_idx]) + linear.linear( h, hws[layer_idx], hbs[layer_idx]) c_bar, h_bar = lstm.lstm(c, lstm_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) c = concat.concat([c_bar, c_rest], axis=0) else: h = h_bar c = c_bar h_list.append(h_bar) c_list.append(c_bar) return h, c, h_list, c_list
def __call__(self, x, W_lateral, b_lateral): lstm_in = x if self.h is not None: lstm_in += linear.linear(self.h, W_lateral, b_lateral) if self.c is None: xp = self.xp self.c = variable.Variable( xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype)) self.c, self.h = lstm.lstm(self.c, lstm_in) return self.h
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ return linear.linear(x, self.W, self.b)
def __call__(self, x, W_lateral, W_peephole, b_peephole): lstm_in = x if self.h is not None: lstm_in += linear.linear(self.h, W_lateral) if self.c is None: xp = self.xp self.c = variable.Variable(xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype), volatile='auto') self.c, self.h = lstm_peephole(self.c, lstm_in, W_peephole, b_peephole) return self.h
def max_singular_value(W, u=None, Ip=1): """ Apply power iteration for the weight parameter """ xp = cuda.get_array_module(W.data) if u is None: u = xp.random.normal(size=(1, W.shape[0])).astype(xp.float32) _u = u for _ in range(Ip): _v = _l2normalize(xp.dot(_u, W.data), eps=1e-12) _u = _l2normalize(xp.dot(_v, W.data.transpose()), eps=1e-12) sigma = sum.sum(linear.linear(_u, transpose.transpose(W)) * _v) return sigma, _u, _v
def __call__(self, xnext, eow, h1, h2, h3, W1, W2, W3, b1, b2, b3, prob_bias): """ xnext : next state of a pen. ndim=(batchsize,3) h : input vector W1, W2, W3: (h.shape[1], 1 + mix_size * 6) b1, b2, b3: (1, 1 + mix_size * 6) prob_bias: probability bias """ mix_size = self.mix_size y = linear.linear(h1, W1, b1) y += linear.linear(h2, W2, b2) y += linear.linear(h3, W3, b3) eos_hat, pi_hat, mu1_hat, mu2_hat, sg1_hat, sg2_hat, rho_hat = chainer.functions.split_axis( y, numpy.asarray([1, 1+mix_size, 1+2*mix_size, 1+3*mix_size, 1+4*mix_size, 1+5*mix_size]), axis=1) self.loss, self.xpred, self.eos, self.pi_, self.mux, self.muy, self.sgx, self.sgy, self.rho = mixture_density_outputs( xnext, eow, eos_hat, pi_hat * (1. + prob_bias), mu1_hat, mu2_hat, sg1_hat - prob_bias, sg2_hat - prob_bias, rho_hat) return self.loss, self.xpred, self.eos, self.pi_, self.mux, self.muy, self.sgx, self.sgy, self.rho
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.W.data is None: self._initialize_params(x.size // x.shape[0]) return linear.linear(x, self.W, self.b)
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.has_uninitialized_params: self._initialize_params(x.shape[1]) return linear.linear(x, self.W, self.b)
def _one_directional_loop(di): # di=0, forward LSTM # di=1, backward LSTM h_list = [] c_list = [] layer_idx = direction * layer + di h = hx[layer_idx] c = cx[layer_idx] if di == 0: xs_list = xs_next else: xs_list = reversed(xs_next) for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) c, c_rest = split_axis.split_axis(c, [batch], axis=0) else: h_rest = None c_rest = None if layer != 0: x = dropout.dropout(x, ratio=dropout_ratio, train=train) lstm_in = linear.linear(x, xws[layer_idx], xbs[layer_idx]) + \ linear.linear(h, hws[layer_idx], hbs[layer_idx]) c_bar, h_bar = lstm.lstm(c, lstm_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) c = concat.concat([c_bar, c_rest], axis=0) else: h = h_bar c = c_bar h_list.append(h_bar) c_list.append(c_bar) return h, c, h_list, c_list
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.W.data is None: in_size = functools.reduce(operator.mul, x.shape[1:], 1) self._initialize_params(in_size) return linear.linear(x, self.W, self.b)
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ if self.has_uninitialized_params: with cuda.get_device(self._device_id): self._initialize_params(x.size // len(x.data)) return linear.linear(x, self.W, self.b)
def l2_Linear(_in, _out): _name = 'l2_Linear' owner_layer = 3 # _in.args[0].shape : batch, in_size (1024, 300) # _out.shape : batch, go_size (1024, 10) batch, in_size = _in.args[0].data.shape batch, go_size = _out.data.shape if var.n < 0: # No fault injection for Normal System case print(_name, 'in/out =', _in.args[0].data.shape, _out.data.shape) assert _in.args[ 0].dtype == np.float32, 'Unsupport input type {}'.format( _out.dtype) assert _out.dtype == np.float32, 'Unsupport out type {}'.format( _out.dtype) # return detect_flag, layer, node, bit, sa01 = var.faultpat[ var.n] if var.n >= 0 else [False, -1, -1, -1, -1] if layer == owner_layer or var.pi == _name: g = _in.args[0].data.copy() # Update _in as PI if var.pi == _name: for i in range(np.prod(g.shape)): g.reshape(-1)[i] = var.PIpat.reshape(-1)[i] # Update _in with sa01 if layer == owner_layer: for i in range(batch): normal = g.reshape(batch, -1)[i, node] v_float, v_uint = bitChange(normal, bit, sa01) g.reshape(batch, -1)[i, node] = np.float32(v_float) #_in.args[0].data = g # Calculate Linear Layer after fault insertion this_linear_out = linear(g, _in.link.__dict__['W'], _in.link.__dict__['b']) this_linear_out = this_linear_out.reshape(_out.data.shape) _out.data = this_linear_out.data if layer == 4: g = _out.data.copy() for i in range(batch): normal = g.reshape(batch, -1)[i, node] v_float, v_uint = bitChange(normal, bit, sa01) g.reshape(batch, -1)[i, node] = np.float32(v_float) if 0: print("{:8d} faultpattern={}".format(var.n, var.faultpat[var.n])) print(' ' * 8, np.max(_out.data), '=>', np.max(g), np.min(_out.data), '=>', np.min(g)) _out.data = g
def lz3_Linear(_in, _out): _out.data[0] = np.zeros(_out.data[0].shape) #print("_in",_in.args[0].shape) #_in.args = np.zeros(_in.args[0].shape,dtype=np.float32) print(_out.data[0].shape) my_linear_out = linear(_in.args[0], _in.link.__dict__['W'], _in.link.__dict__['b']) my_linear_out = my_linear_out.reshape(_out.data[0].shape) print(my_linear_out.shape) print(my_linear_out) set_trace() _out.data[0] = my_linear_out.data print(_out) pass
def lx1_Linear(_in,_out): # n=0 n=var.n if n<0: return # No fault injection for Normal System case num = 1 d=mylist() print("mylist:",n,d[n]) x=d[n][0] y=d[n][1] g = func(_in.args[0], x, y, num, 1) _in.args[0].data = g my_linear_out = linear(_in.args[0], _in.link.__dict__['W'], _in.link.__dict__['b']) my_linear_out = my_linear_out.reshape(_out.data.shape) _out.data = my_linear_out.data
def __call__(self, cs, ls, h, W, b): """ cs : one-hot-encoding of a length U character sequence h : input vector (summation of affine transformation of outputs from hidden layers) """ mix_size = self.mix_size y = linear.linear(h, W, b) a_hat, b_hat, k_hat = chainer.functions.split_axis( y, numpy.asarray([mix_size, 2 * mix_size]), axis=1) if self.k_prev is None: xp = self.xp self.k_prev = variable.Variable(xp.zeros_like(k_hat.data)) self.ws, self.k_prev, self.eow = soft_window(cs, ls, a_hat, b_hat, k_hat, self.k_prev) return self.ws, self.eow
def forward(self, x, n_batch_axes=1): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. n_batch_axes (int): The number of batch axes. The default is 1. The input variable is reshaped into (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor. This should be greater than 0. Returns: ~chainer.Variable: Output of the linear layer. """ if self.W.array is None: in_size = utils.size_of_shape(x.shape[1:]) self._initialize_params(in_size) return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)
def forward(self, x, n_batch_axes=1): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. n_batch_axes (int): The number of batch axes. The default is 1. The input variable is reshaped into (:math:`{\\rm n\\_batch\\_axes} + 1`)-dimensional tensor. This should be greater than 0. Returns: ~chainer.Variable: Output of the linear layer. """ if self.W.array is None: in_size = functools.reduce(operator.mul, x.shape[1:], 1) self._initialize_params(in_size) return linear.linear(x, self.W, self.b, n_batch_axes=n_batch_axes)
def lx1_Linear(_in,_out): # n=0 n=var.n if n<0: return # No fault injection for Normal System case num = 1 d=mylist() print("mylist:",n,d[n]) x=d[n][0] y=d[n][1] #g = func(_in.args[0].get(stream=None), x, y, num, 1) #g = _in.args[0].get(stream=None) #_in.args[0][:,x,y,0]=_in.args[0][:,x,y,0] for i in range(10):_in.args[0][0,x,y,0]=1 #_in.args[0].set(G,stream=None) my_linear_out = linear(_in.args[0], _in.link.__dict__['W'], _in.link.__dict__['b']) my_linear_out = my_linear_out.reshape(_out.data.shape) _out.data.set(my_linear_out.data.get(stream=None).copy())
def __call__(self, x): if self.mu_w.data is None: in_size = functools.reduce(operator.mul, x.shape[1:], 1) self._initialize_params(in_size) dtype = self.mu_w.dtype out_size, in_size = self.mu_w.shape if self.factorized: eps_i = self._eps(in_size, dtype) eps_j = self._eps(out_size, dtype) eps_w = self.xp.outer(eps_j, eps_i) else: eps_w = self._eps((out_size, in_size), dtype) eps_j = self._eps(out_size, dtype) W = self.mu_w + self.sigma_w * eps_w if self.mu_b is None: b = None else: b = self.mu_b.reshape((out_size, )) + self.sigma_b * eps_j return linear.linear(x, W, b)
def n_step_lstm(n_layers, dropout_ratio, hx, cx, ws, bs, xs, train=True, use_cudnn=True): """Stacked Long Short-Term Memory function for sequence inputs. This function calculates stacked LSTM with sequences. This function gets an initial hidden state :math:`h_0`, an initial cell state :math:`c_0`, an input sequence :math:`x`, weight matrices :math:`W`, and bias vectors :math:`b`. This function calculates hidden states :math:`h_t` and :math:`c_t` for each time :math:`t` from input :math:`x_t`. .. math:: i_t = \sigma(W_0 x_t + W_4 h_{t-1} + b_0 + b_4) f_t = \sigma(W_1 x_t + W_5 h_{t-1} + b_1 + b_5) o_t = \sigma(W_2 x_t + W_6 h_{t-1} + b_2 + b_6) a_t = \tanh(W_3 x_t + W_7 h_{t-1} + b_3 + b_7) c_t = f_t \dot c_{t-1} + i_t \dot a_t h_t = o_t \dot \tanh(c_t) As the function accepts a sequence, it calculates :math:`h_t` for all :math:`t` with one call. Eight weight matrices and eight bias vectors are required for each layers. So, when :math:`S` layers exists, you need to prepare :math:`8S` weigth matrices and :math:`8S` bias vectors. If the number of layers ``n_layers`` is greather than :math:`1`, input of ``k``-th layer is hidden state ``h_t`` of ``k-1``-th layer. Note that all input variables except first layer may have different shape from the first layer. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimention of hidden units. cx (chainer.Variable): Variable holding stacked cell states. It has the same shape as ``hx``. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing eight matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing eight vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimention of hidden units. xs (list of chainer.Variable): A list of :class:`chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this functions supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. train (bool): If ``True``, this function executes dropout. use_cudnn (bool): If ``True``, this function uses cuDNN if available. Returns: tuple: This functions returns a tuple concaining three elements, ``hy``, ``cy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is same as ``hx``. - ``cy`` is an updated cell states whose shape is same as ``cx``. - ``ys`` is a list of :class:~chainer.Variable. Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is mini-batch size for time ``t``, and ``N`` is size of hidden units. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.lstm` """ xp = cuda.get_array_module(hx, hx.data) if use_cudnn and xp is not numpy and cuda.cudnn_enabled and \ _cudnn_version >= 5000: states = get_random_state().create_dropout_states(dropout_ratio) # flatten all input variables inputs = tuple( itertools.chain((hx, cx), itertools.chain.from_iterable(ws), itertools.chain.from_iterable(bs), xs)) rnn = NStepLSTM(n_layers, states, train=train) ret = rnn(*inputs) hy, cy = ret[:2] ys = ret[2:] return hy, cy, ys else: hx = split_axis.split_axis(hx, n_layers, axis=0, force_tuple=True) hx = [reshape.reshape(h, h.shape[1:]) for h in hx] cx = split_axis.split_axis(cx, n_layers, axis=0, force_tuple=True) cx = [reshape.reshape(c, c.shape[1:]) for c in cx] xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws] hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws] xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs] hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs] ys = [] for x in xs: batch = x.shape[0] h_next = [] c_next = [] for layer in six.moves.range(n_layers): h = hx[layer] c = cx[layer] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) c, c_rest = split_axis.split_axis(c, [batch], axis=0) else: h_rest = None x = dropout.dropout(x, ratio=dropout_ratio, train=train) h = dropout.dropout(h, ratio=dropout_ratio, train=train) lstm_in = linear.linear(x, xws[layer], xbs[layer]) + \ linear.linear(h, hws[layer], hbs[layer]) c_bar, h_bar = lstm.lstm(c, lstm_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) c = concat.concat([c_bar, c_rest], axis=0) else: h = h_bar c = c_bar h_next.append(h) c_next.append(c) x = h_bar hx = h_next cx = c_next ys.append(x) hy = stack.stack(hx) cy = stack.stack(cx) return hy, cy, tuple(ys)
def fixed_length_n_step_lstm( n_layers, dropout_ratio, hx, cx, ws, bs, xs, train=True, ): xp = cuda.get_array_module(hx, hx.data) if xp is not numpy and cuda.cudnn_enabled and _cudnn_version >= 5000: states = get_random_state().create_dropout_states(dropout_ratio) # flatten all input variables inputs = tuple( itertools.chain((hx, cx), itertools.chain.from_iterable(ws), itertools.chain.from_iterable(bs), (xs, ))) rnn = FixedLengthNStepLSTMFunction(n_layers, states, train=train) ret = rnn(*inputs) hy, cy, ys = ret _, batch_size, dim = hy.shape ys_reshape = F.reshape(ys, (-1, batch_size, dim)) # (length, batch, dim) return hy, cy, ys_reshape else: hx = split_axis.split_axis(hx, n_layers, axis=0, force_tuple=True) hx = [reshape.reshape(h, h.shape[1:]) for h in hx] cx = split_axis.split_axis(cx, n_layers, axis=0, force_tuple=True) cx = [reshape.reshape(c, c.shape[1:]) for c in cx] xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws] hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws] xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs] hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs] ys = [] for x in xs: batch = x.shape[0] h_next = [] c_next = [] for layer in six.moves.range(n_layers): h = hx[layer] c = cx[layer] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) c, c_rest = split_axis.split_axis(c, [batch], axis=0) else: h_rest = None x = dropout.dropout(x, ratio=dropout_ratio) h = dropout.dropout(h, ratio=dropout_ratio) lstm_in = linear.linear(x, xws[layer], xbs[layer]) + \ linear.linear(h, hws[layer], hbs[layer]) c_bar, h_bar = lstm.lstm(c, lstm_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) c = concat.concat([c_bar, c_rest], axis=0) else: h = h_bar c = c_bar h_next.append(h) c_next.append(c) x = h_bar hx = h_next cx = c_next ys.append(x) hy = stack.stack(hx) cy = stack.stack(cx) #return hy, cy, tuple(ys) ys_concat = F.concat(ys, axis=0) ys_reshape = F.reshape( ys_concat, (-1, ys[0].shape[0], ys[0].shape[1])) # (length, batch, dim) return hy, cy, ys_reshape
def nonlinear(x, W, b=None): y = linear.linear(x, W, b) return y * y
def n_step_lstm( n_layers, dropout_ratio, hx, cx, ws, bs, xs, train=True, use_cudnn=True): """Stacked Long Short-Term Memory function for sequence inputs. This function calculates stacked LSTM with sequences. This function gets an initial hidden state :math:`h_0`, an initial cell state :math:`c_0`, an input sequence :math:`x`, weight matrices :math:`W`, and bias vectors :math:`b`. This function calculates hidden states :math:`h_t` and :math:`c_t` for each time :math:`t` from input :math:`x_t`. .. math:: i_t &= \\sigma(W_0 x_t + W_4 h_{t-1} + b_0 + b_4) \\\\ f_t &= \\sigma(W_1 x_t + W_5 h_{t-1} + b_1 + b_5) \\\\ o_t &= \\sigma(W_2 x_t + W_6 h_{t-1} + b_2 + b_6) \\\\ a_t &= \\tanh(W_3 x_t + W_7 h_{t-1} + b_3 + b_7) \\\\ c_t &= f_t \\dot c_{t-1} + i_t \\dot a_t \\\\ h_t &= o_t \\dot \\tanh(c_t) As the function accepts a sequence, it calculates :math:`h_t` for all :math:`t` with one call. Eight weight matrices and eight bias vectors are required for each layers. So, when :math:`S` layers exists, you need to prepare :math:`8S` weigth matrices and :math:`8S` bias vectors. If the number of layers ``n_layers`` is greather than :math:`1`, input of ``k``-th layer is hidden state ``h_t`` of ``k-1``-th layer. Note that all input variables except first layer may have different shape from the first layer. Args: n_layers(int): Number of layers. dropout_ratio(float): Dropout ratio. hx (chainer.Variable): Variable holding stacked hidden states. Its shape is ``(S, B, N)`` where ``S`` is number of layers and is equal to ``n_layers``, ``B`` is mini-batch size, and ``N`` is dimention of hidden units. cx (chainer.Variable): Variable holding stacked cell states. It has the same shape as ``hx``. ws (list of list of chainer.Variable): Weight matrices. ``ws[i]`` represents weights for i-th layer. Each ``ws[i]`` is a list containing eight matrices. ``ws[i][j]`` is corresponding with ``W_j`` in the equation. Only ``ws[0][j]`` where ``0 <= j < 4`` is ``(I, N)`` shape as they are multiplied with input variables. All other matrices has ``(N, N)`` shape. bs (list of list of chainer.Variable): Bias vectors. ``bs[i]`` represnents biases for i-th layer. Each ``bs[i]`` is a list containing eight vectors. ``bs[i][j]`` is corresponding with ``b_j`` in the equation. Shape of each matrix is ``(N,)`` where ``N`` is dimention of hidden units. xs (list of chainer.Variable): A list of :class:`~chainer.Variable` holding input values. Each element ``xs[t]`` holds input value for time ``t``. Its shape is ``(B_t, I)``, where ``B_t`` is mini-batch size for time ``t``, and ``I`` is size of input units. Note that this functions supports variable length sequences. When sequneces has different lengths, sort sequences in descending order by length, and transpose the sorted sequence. :func:`~chainer.functions.transpose_sequence` transpose a list of :func:`~chainer.Variable` holding sequence. So ``xs`` needs to satisfy ``xs[t].shape[0] >= xs[t + 1].shape[0]``. train (bool): If ``True``, this function executes dropout. use_cudnn (bool): If ``True``, this function uses cuDNN if available. Returns: tuple: This functions returns a tuple concaining three elements, ``hy``, ``cy`` and ``ys``. - ``hy`` is an updated hidden states whose shape is same as ``hx``. - ``cy`` is an updated cell states whose shape is same as ``cx``. - ``ys`` is a list of :class:`~chainer.Variable` . Each element ``ys[t]`` holds hidden states of the last layer corresponding to an input ``xs[t]``. Its shape is ``(B_t, N)`` where ``B_t`` is mini-batch size for time ``t``, and ``N`` is size of hidden units. Note that ``B_t`` is the same value as ``xs[t]``. .. seealso:: :func:`chainer.functions.lstm` """ xp = cuda.get_array_module(hx, hx.data) if use_cudnn and xp is not numpy and cuda.cudnn_enabled and \ _cudnn_version >= 5000: states = get_random_state().create_dropout_states(dropout_ratio) # flatten all input variables inputs = tuple(itertools.chain( (hx, cx), itertools.chain.from_iterable(ws), itertools.chain.from_iterable(bs), xs)) rnn = NStepLSTM(n_layers, states, train=train) ret = rnn(*inputs) hy, cy = ret[:2] ys = ret[2:] return hy, cy, ys else: hx = split_axis.split_axis(hx, n_layers, axis=0, force_tuple=True) hx = [reshape.reshape(h, h.shape[1:]) for h in hx] cx = split_axis.split_axis(cx, n_layers, axis=0, force_tuple=True) cx = [reshape.reshape(c, c.shape[1:]) for c in cx] xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws] hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws] xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs] hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs] ys = [] for x in xs: batch = x.shape[0] h_next = [] c_next = [] for layer in six.moves.range(n_layers): h = hx[layer] c = cx[layer] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) c, c_rest = split_axis.split_axis(c, [batch], axis=0) else: h_rest = None x = dropout.dropout(x, ratio=dropout_ratio, train=train) h = dropout.dropout(h, ratio=dropout_ratio, train=train) lstm_in = linear.linear(x, xws[layer], xbs[layer]) + \ linear.linear(h, hws[layer], hbs[layer]) c_bar, h_bar = lstm.lstm(c, lstm_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) c = concat.concat([c_bar, c_rest], axis=0) else: h = h_bar c = c_bar h_next.append(h) c_next.append(c) x = h_bar hx = h_next cx = c_next ys.append(x) hy = stack.stack(hx) cy = stack.stack(cx) return hy, cy, tuple(ys)