def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None): nsteps = state_below.shape[0] if state_below.ndim == 3: n_samples = state_below.shape[1] else: n_samples = 1 assert mask is not None def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n * dim:(n + 1) * dim] return _x[:, n * dim:(n + 1) * dim] def _step(m_, x_, h_, c_): preact = T.dot(h_, tparams[_p(prefix, 'U')]) preact += x_ i = T.nnet.sigmoid(_slice(preact, 0, options['dim_proj'])) f = T.nnet.sigmoid(_slice(preact, 1, options['dim_proj'])) o = T.nnet.sigmoid(_slice(preact, 2, options['dim_proj'])) c = T.tanh(_slice(preact, 3, options['dim_proj'])) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * T.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c state_below = (T.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')]) dim_proj = options['dim_proj'] rval, updates = theano.scan(_step, sequences=[mask, state_below], outputs_info=[T.alloc(numpy_floatX(0.), n_samples, dim_proj), T.alloc(numpy_floatX(0.), n_samples, dim_proj)], name=_p(prefix, '_layers'), n_steps=nsteps) return rval[0]
def param_init_lstm(options, params, prefix='lstm'): """ Init the LSTM parameter: :see: init_params """ W = numpy.concatenate([ortho_weight(options['dim_proj']), ortho_weight(options['dim_proj']), ortho_weight(options['dim_proj']), ortho_weight(options['dim_proj'])], axis=1) params[_p(prefix, 'W')] = W U = numpy.concatenate([ortho_weight(options['dim_proj']), ortho_weight(options['dim_proj']), ortho_weight(options['dim_proj']), ortho_weight(options['dim_proj'])], axis=1) params[_p(prefix, 'U')] = U b = numpy.zeros((4 * options['dim_proj'],)) params[_p(prefix, 'b')] = b.astype(config.floatX) return params
def _step(m_, x_, h_, c_): preact = T.dot(h_, tparams[_p(prefix, 'U')]) preact += x_ i = T.nnet.sigmoid(_slice(preact, 0, options['dim_proj'])) f = T.nnet.sigmoid(_slice(preact, 1, options['dim_proj'])) o = T.nnet.sigmoid(_slice(preact, 2, options['dim_proj'])) c = T.tanh(_slice(preact, 3, options['dim_proj'])) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * T.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c