def gru_layer(tparams, state_below, options, prefix='gru', mask=None, **kwargs): """ Forward pass through GRU layer """ nsteps = state_below.shape[0] if state_below.ndim == 3: n_samples = state_below.shape[1] else: n_samples = 1 dim = tparams[pref(prefix,'Ux')].shape[1] if mask == None: mask = tensor.alloc(1., state_below.shape[0], 1) def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n*dim:(n+1)*dim] return _x[:, n*dim:(n+1)*dim] state_below_ = tensor.dot(state_below, tparams[pref(prefix, 'W')]) + tparams[pref(prefix, 'b')] state_belowx = tensor.dot(state_below, tparams[pref(prefix, 'Wx')]) + tparams[pref(prefix, 'bx')] u = tparams[pref(prefix, 'U')] ux = tparams[pref(prefix, 'Ux')] def _step_slice(m_, x_, xx_, h_, u, ux): preact = tensor.dot(h_, u) preact += x_ r = tensor.nnet.sigmoid(_slice(preact, 0, dim)) u = tensor.nnet.sigmoid(_slice(preact, 1, dim)) preactx = tensor.dot(h_, ux) preactx = preactx * r preactx = preactx + xx_ h = tensor.tanh(preactx) h = u * h_ + (1. - u) * h h = m_[:,None] * h + (1. - m_)[:,None] * h_ return h seqs = [mask, state_below_, state_belowx] _step = _step_slice rval, updates = theano.scan(_step, sequences=seqs, outputs_info = [tensor.alloc(0., n_samples, dim)], non_sequences = [tparams[pref(prefix, 'U')], tparams[pref(prefix, 'Ux')]], name=pref(prefix, '_layers'), n_steps=nsteps, profile=profile, strict=True) rval = [rval] return rval
def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs): """ Feedforward pass """ return eval(activ)(tensor.dot(state_below, tparams[pref(prefix, 'W')]) + tparams[pref(prefix, 'b')])
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True): """ Affine transformation + point-wise nonlinearity """ if nin == None: nin = options['dim_proj'] if nout == None: nout = options['dim_proj'] params[pref(prefix, 'W')] = norm_weight(nin, nout, ortho=ortho) params[pref(prefix, 'b')] = numpy.zeros((nout, )).astype('float32') return params
def param_init_gru(options, params, prefix='gru', nin=None, dim=None): """ Gated Recurrent Unit (GRU) """ if nin == None: nin = options['dim_proj'] if dim == None: dim = options['dim_proj'] w = numpy.concatenate( [norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[pref(prefix, 'W')] = w params[pref(prefix, 'b')] = numpy.zeros((2 * dim, )).astype('float32') u = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[pref(prefix, 'U')] = u wx = norm_weight(nin, dim) params[pref(prefix, 'Wx')] = wx ux = ortho_weight(dim) params[pref(prefix, 'Ux')] = ux params[pref(prefix, 'bx')] = numpy.zeros((dim, )).astype('float32') return params